diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 6fdff914f31774496c3709b92b7ffe940dd26642..33f72ecb941c410f7541f32541a34878ff987b3f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,7 +10,7 @@
         "notskm.clang-tidy",
         "streetsidesoftware.code-spell-checker"
     ],
-    "runArgs": [//"--gpus","all",                     // remove this line in case you have no gpus available
+    "runArgs": ["--gpus","all",                     // remove this line in case you have no gpus available
                 "--hostname=${localEnv:HOSTNAME}"], // HOSTNAME needs to be known by the vscode environment. It is probably necessary to add "export HOSTNAME=<hostname>" to the config file of your host machine's bash.
 
     "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4"
diff --git a/.gitignore b/.gitignore
index e4e47aa4b705c9035394f5ea15c0d5e653859206..3b236ea5bdd793ed34603010c520fa1f1a43e34b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,8 +9,10 @@ buildGCC
 _skbuild/
 dist/
 *.egg-info/
-__pycache__/
+**/__pycache__/
 .venv/
+pythonbindings/pyfluids/bindings*
+pythonbindings/pymuparser/bindings*
 
 # IDE
 .vscode/
@@ -38,4 +40,7 @@ stl/
 .DS_Store
 
 # Settings
-.gitconfig
\ No newline at end of file
+.gitconfig
+
+# User Settings
+CMakeUserPresets.json
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b34c5a8f66c1340670b6acd80ea6a9901b2760d1..e171e2e7fbe1984588355f5a833a21160024da32 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,7 @@
 ###############################################################################
 ##                       VirtualFluids CI Pipeline                           ##
 ###############################################################################
-image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.3
+image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4
 
 stages:
   - build
@@ -49,7 +49,7 @@ stages:
     - cd $CI_PROJECT_DIR/$BUILD_FOLDER
     - rm -r -f ./*
     - cmake .. -LAH
-      --preset=all_make
+      --preset=make_all
       -DBUILD_WARNINGS_AS_ERRORS=ON
       -DCMAKE_CUDA_ARCHITECTURES=60
     - make -j4
@@ -75,7 +75,7 @@ clang_10:
     - export CXX=clang++
 
 ###############################################################################
-msvc_16:
+msvc_17:
   stage: build
 
   tags:
@@ -92,14 +92,14 @@ msvc_16:
     - git --version
     - $env:Path += ";C:\Program Files\CMake\bin\"
     - cmake --version
-    - $env:Path += ";C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin"
+    - $env:Path += ";C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin"
     - MSBuild.exe -version
 
   script:
     - cd $CI_PROJECT_DIR
     - md -force $env:BUILD_FOLDER
     - cd $env:BUILD_FOLDER
-    - cmake .. --preset=all_msvc -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON
+    - cmake .. --preset=msvc_all -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON
     - MSBuild.exe VirtualFluids.sln /property:Configuration=$env:BUILD_CONFIGURATION /verbosity:minimal /maxcpucount:4
 
   artifacts:
@@ -126,33 +126,44 @@ gcc_9_python:
     paths:
       - build/
       - dist/
+      - _skbuild/
 
   before_script:
     - export CCACHE_BASEDIR=$CI_PROJECT_DIR
     - export CCACHE_DIR=$CI_PROJECT_DIR/cache
 
   script:
-    - python3 setup.py bdist_wheel build_ext --build-temp=build
+    - python3 setup.py bdist_wheel build_ext --build-temp=_skbuild -- -DBUILD_VF_CPU=ON -DBUILD_VF_DOUBLE_ACCURACY=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache
 
 ###############################################################################
 ##                            Container Upload                               ##
 ###############################################################################
-build_singularity_image:
+build_poiseuille_test_container:
+  image: 
+    name: quay.io/singularity/singularity:v3.10.2
+    entrypoint: [""]
+
   stage: container_upload
 
-  needs:
-    - gcc_9_python
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
 
   tags:
     - linux
     - privileged
 
-  rules:
-    - if: $CI_COMMIT_TAG
+  artifacts:
+    expire_in: 1 hrs
+    paths:
+      - Containers/PoiseuilleTestContainer.sif
 
   script:
-    - singularity build Containers/VirtualFluidsPython.sif Containers/VirtualFluidsPython.def
-    - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Containers/VirtualFluidsPython.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG"
+    - singularity build "Containers/PoiseuilleTestContainer.sif" "Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def"
 
 ###############################################################################
 ##                                Tests                                      ##
@@ -169,14 +180,14 @@ gcc_9_unit_tests:
     - ctest
 
 ###############################################################################
-msvc_16_unit_tests:
+msvc_17_unit_tests:
   stage: test
 
   tags:
     - win
     - gpu
 
-  needs: ["msvc_16"]
+  needs: ["msvc_17"]
 
   before_script:
     - $env:Path += ";C:\Program Files\CMake\bin\"
@@ -202,6 +213,52 @@ gcc_9_python_bindings_test:
     - python3 -m unittest discover -s Python -v
 
 
+###############################################################################
+gcc_9_python_hpc_test:
+  image: python:latest
+  stage: test
+
+  needs: ["build_poiseuille_test_container"]
+
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  before_script:
+    - pip install hpc-rocket
+
+  script:
+    - hpc-rocket launch --watch Python/SlurmTests/poiseuille/rocket.yml
+
+###############################################################################
+multigpu_hpc_test:
+  image: python:latest
+  stage: test
+
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  before_script:
+    - pip install hpc-rocket
+    - pip install "fieldcompare[all]"
+
+  script:
+    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket.yml
+    - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
+    - cd test_data
+    - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels
+    - cd ..
+    - fieldcompare dir output/results test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "*.vtu"
+
 ###############################################################################
 ##                            Benchmark                                      ##
 ###############################################################################
@@ -248,7 +305,7 @@ gpu_numerical_tests:
     - cd $CI_PROJECT_DIR/build
     - rm -r -f ./*
     - cmake ..
-      --preset=gpu_numerical_tests_make
+      --preset=make_numerical_tests_gpu
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DPATH_NUMERICAL_TESTS=/tmp/test_data/numerical_tests_gpu
     - make -j4
@@ -319,8 +376,7 @@ clang_build_analyzer_clang_10:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      -DBUILD_VF_CPU=ON
-      -DBUILD_VF_GPU=ON
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DCMAKE_CXX_FLAGS=-ftime-trace
     - ClangBuildAnalyzer --start .
@@ -352,8 +408,7 @@ include_what_you_use_clang_10:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      -DBUILD_VF_CPU=ON
-      -DBUILD_VF_GPU=ON
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DBUILD_VF_INCLUDE_WHAT_YOU_USE=ON
     - make
@@ -430,7 +485,7 @@ gcov_gcc_9:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      --preset=all_make
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DBUILD_VF_COVERAGE=ON
     - make -j4
@@ -473,6 +528,7 @@ clang-tidy:
     - cd $CI_PROJECT_DIR/build
     - cmake ..
       -DBUILD_VF_CPU=ON
+      -DBUILD_VF_DOUBLE_ACCURACY=ON
       -DBUILD_VF_GPU=OFF
     - python3 ../utilities/filterCompileCommands.py compile_commands.json
     - run-clang-tidy -quiet > clangtidy.txt
@@ -604,26 +660,3 @@ sonar-scanner:
   script:
     - cd $CI_PROJECT_DIR
     - sonar-scanner -X -Dsonar.verbose=true -Dsonar.login=$SONAR_SECURITY_TOKEN
-
-###############################################################################
-##                              Release                                      ##
-###############################################################################
-create_release:
-  stage: release
-
-  image: registry.gitlab.com/gitlab-org/release-cli:latest
-
-  needs: ["build_singularity_image"]
-
-  rules:
-    - if: $CI_COMMIT_TAG
-
-  script:
-    - echo "Creating release with tag $CI_COMMIT_TAG"
-    - release-cli create --name "VirtualFluids $CI_COMMIT_TAG" \
-      --description "VirtualFluids CFD Simulator" \
-      --tag-name "$CI_COMMIT_TAG" \
-      --ref "$CI_COMMIT_SHA" \
-      --job-token "$CI_JOB_TOKEN" \
-      --assets-link="{'name':'VirtualFluidsSingularityImage_OpenMPI','url':'','type':'other','filepath':'Containers/VirtualFluidsOpenMPI.sif'}"
-    - build/bin/basicsTests
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..50d4989d5c269521392644515d716fa93b3cf6e3
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+type: software
+authors:
+  - family-names: Kutscher
+    given-names: Konstantin
+    orcid: https://orcid.org/0000-0002-1099-1608
+  - family-names: SchÃ¶nherr
+    given-names: Martin
+    orcid: https://orcid.org/0000-0002-4774-1776
+  - family-names: Geier
+    given-names: Martin
+    orcid: https://orcid.org/0000-0002-8367-9412
+  - family-names: Krafczyk
+    given-names: Manfred
+    orcid: https://orcid.org/0000-0002-8509-0871
+  - family-names: Alihussein
+    given-names: Hussein
+    orcid: https://orcid.org/0000-0003-3656-7028
+  - family-names: Linxweiler
+    given-names: Jan
+    orcid: https://orcid.org/0000-0002-2755-5087
+  - family-names: Peters
+    given-names: SÃ¶ren
+    orcid: https://orcid.org/0000-0001-5236-3776
+  - family-names: Wellmann
+    given-names: Anna
+    orcid: https://orcid.org/0000-0002-8825-2995
+  - family-names: Safari
+    given-names: Hesameddin
+    orcid: https://orcid.org/0000-0002-2755-5087
+  - family-names: Marcus
+    given-names: Sven
+    orcid: https://orcid.org/0000-0003-3689-2162
+title: "VirtualFluids"
+version: 0.1.0
+license: GPL-3.0-or-later
+repository-code: "https://git.rz.tu-bs.de/irmb/VirtualFluids"
+date-released: "XXXXXXX"
+
diff --git a/CMake/FileUtilities.cmake b/CMake/FileUtilities.cmake
index 151000a681795923d4e31ed8c5f06dfd1e7af7fd..13057ef832b5aa2d7ce303fe55e95a91284f5f56 100644
--- a/CMake/FileUtilities.cmake
+++ b/CMake/FileUtilities.cmake
@@ -5,7 +5,7 @@
 ## After function call the files are stored in: MY_SRCS
 #################################################################################
 
-macro(includeAllFiles targetName file_path)
+macro(includeAllFiles folderName targetName file_path)
 	if(NOT DEFINED collectTestFiles)
 	    set(collectTestFiles ON)
 	endif()
@@ -14,11 +14,11 @@ macro(includeAllFiles targetName file_path)
         set(collectProductionFiles ON)
     endif()
 
-	includeFiles(${targetName} "${file_path}")
+	includeFiles(${folderName} ${targetName} "${file_path}")
 endmacro(includeAllFiles)
 
 
-macro(includeProductionFiles targetName file_path)
+macro(includeProductionFiles folderName targetName file_path)
 	if(NOT DEFINED collectTestFiles)
 	    set(collectTestFiles OFF)
 	endif()
@@ -27,12 +27,12 @@ macro(includeProductionFiles targetName file_path)
         set(collectProductionFiles ON)
     endif()
 
-	includeFiles(${targetName} "${file_path}")
+	includeFiles(${folderName}  ${targetName} "${file_path}")
 endmacro(includeProductionFiles)
 
 
 
-macro(includeTestFiles targetName file_paths)
+macro(includeTestFiles folderName file_paths)
 	if(NOT DEFINED collectTestFiles)
 		set(collectTestFiles ON)
 	endif()
@@ -41,13 +41,13 @@ macro(includeTestFiles targetName file_paths)
 		set(collectProductionFiles OFF)
 	endif()
 
-	includeFiles(${targetName} "${file_paths}")
+	includeFiles(${folderName} ${folderName} "${file_paths}")
 endmacro(includeTestFiles)
 
 
 
 
-macro(includeFiles targetName file_paths)
+macro(includeFiles folderName targetName file_paths)
 
 	foreach(file ${file_paths})
 
@@ -57,7 +57,7 @@ macro(includeFiles targetName file_paths)
 
 		collectFilesFrom(${file})
 		if (package_dir)
-		   setSourceGroupForFilesIn(${file} ${package_dir} ${targetName})
+		   setSourceGroupForFilesIn(${file} ${package_dir} ${targetName} ${folderName})
 		endif()
 
 	endforeach()
@@ -90,9 +90,9 @@ endmacro()
 
 
 
-macro(setSourceGroupForFilesIn file package_dir targetName)
+macro(setSourceGroupForFilesIn file package_dir targetName folderName)
 #input: target_name PACKAGE_SRCS
-	buildSourceGroup(${targetName} ${package_dir})
+	buildSourceGroup(${folderName} ${package_dir})
 
 	if(isAllTestSuite)
 		source_group(${targetName}\\${SOURCE_GROUP} FILES ${file})
@@ -105,20 +105,20 @@ endmacro(setSourceGroupForFilesIn)
 
 
 
-macro(buildSourceGroup targetName path)
-#input: targetName (e.g. lib name, exe name)
+macro(buildSourceGroup folderName path)
+#input: folderName (e.g. name of folder after src/)
 
 	unset(SOURCE_GROUP)
 	string(REPLACE "/" ";" folderListFromPath ${path})
-	set(findTargetName 0)
+	set(findFolderName 0)
 
 	foreach(folder ${folderListFromPath})
-		if(findTargetName)
+		if(findFolderName)
 			set(SOURCE_GROUP ${SOURCE_GROUP}\\${folder})
 		endif()
 
-		if(${folder} STREQUAL ${targetName})
-			SET(findTargetName 1)
+		if(${folder} STREQUAL ${folderName})
+			SET(findFolderName 1)
 		endif()
 	endforeach()
 
diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake
index 63503f5f14221bb8cec7670dbdda6aa92497d327..4fd163b2cc1b53fe461ef482d906f4cb1255a76c 100644
--- a/CMake/VirtualFluidsMacros.cmake
+++ b/CMake/VirtualFluidsMacros.cmake
@@ -105,14 +105,15 @@ function(vf_add_library)
 
     set( options )
     set( oneValueArgs NAME BUILDTYPE)
-    set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE)
+    set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE MODULEFOLDER)
     cmake_parse_arguments( ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
 
-    if(DEFINED ARG_NAME)
+    if(DEFINED ARG_NAME) 
         set(library_name ${ARG_NAME})
     else()
         vf_get_library_name (library_name)
     endif()
+    vf_get_library_name (folder_name) # folder_name is not equal to library_name when ARG_NAME was set
 
     if(NOT DEFINED ARG_BUILDTYPE)
         if(BUILD_SHARED_LIBS)
@@ -122,12 +123,16 @@ function(vf_add_library)
         endif()
     endif()
 
+    if(DEFINED ARG_MODULEFOLDER)
+        set(folder_name ${ARG_MODULEFOLDER})
+    endif()
+
     status("Configuring the target: ${library_name} (type=${ARG_BUILDTYPE})...")
 
 
     collectFiles(sourceFiles "${ARG_FILES}" "${ARG_FOLDER}" "${ARG_EXCLUDE}")
 
-    includeProductionFiles (${library_name} "${sourceFiles}")
+    includeProductionFiles (${folder_name} ${library_name} "${sourceFiles}")
 
     #################################################################
     ###   ADD TARGET                                              ###
@@ -325,4 +330,4 @@ function(vf_load_user_apps)
     foreach(app IN LISTS USER_APPS)
       add_subdirectory(${app})
     endforeach()
-endfunction()
\ No newline at end of file
+endfunction()
diff --git a/CMake/cmake_config_files/MOLLOK.config.cmake b/CMake/cmake_config_files/MOLLOK.config.cmake
index f700f3cd7a4b5669ef6ffee9436a1528e50e9dc9..72470da1bc52a242cb8e3c341e0e7f87bb06ab26 100644
--- a/CMake/cmake_config_files/MOLLOK.config.cmake
+++ b/CMake/cmake_config_files/MOLLOK.config.cmake
@@ -12,4 +12,5 @@ set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
 # add invidual apps here
-list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
\ No newline at end of file
+list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
+list(APPEND USER_APPS "apps/gpu/LBM/TGV_3D_GridRef")
diff --git a/CMake/cmake_config_files/MULE.config.cmake b/CMake/cmake_config_files/MULE.config.cmake
index 02f61b7988c5b3af9cd58bc52e46b1b2edfe8aae..2afbce6cc257fa0b8ff4dd7de580cb50c01369f1 100644
--- a/CMake/cmake_config_files/MULE.config.cmake
+++ b/CMake/cmake_config_files/MULE.config.cmake
@@ -1 +1,4 @@
-SET(CMAKE_CUDA_ARCHITECTURES "75")
\ No newline at end of file
+SET(CMAKE_CUDA_ARCHITECTURES "75")
+
+list(APPEND USER_APPS "apps/gpu/LBM/ActuatorLine")
+list(APPEND USER_APPS "apps/gpu/LBM/SphereScaling")
diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake
index d31d8684a53a769e48408ad5febe7d2c6b22c623..5ca4d9821d918f66745fc27363975811dc278440 100644
--- a/CMake/cmake_config_files/PHOENIX.config.cmake
+++ b/CMake/cmake_config_files/PHOENIX.config.cmake
@@ -28,7 +28,7 @@ set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100
 
 set(GPU_APP "apps/gpu/LBM/")
 list(APPEND USER_APPS 
-    # "${GPU_APP}DrivenCavityMultiGPU"
+    "${GPU_APP}DrivenCavityMultiGPU"
     # "${GPU_APP}SphereScaling"
     # "${GPU_APP}MusselOyster"
     )
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3af407acd66ec3223f55de7753df879786ce561..c6498bf19bb021f3ae19d69c4131aa56476149be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,7 @@
 cmake_minimum_required(VERSION 3.15..3.20 FATAL_ERROR)
 
 project(VirtualFluids
-        VERSION 1.0.0
+        VERSION 0.1.0
         DESCRIPTION "CFD code based on the Lattice Boltzmann Method"
         HOMEPAGE_URL "https://www.tu-braunschweig.de/irmb/forschung/virtualfluids"
         LANGUAGES CXX)
diff --git a/CMakePresets.json b/CMakePresets.json
index 0f360fd303cdcad923b01d56df5c6d48ad62ca2c..6e2658d148bddf55950e5849adcf10709a8b8caf 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -1,34 +1,47 @@
 {
-    "version": 2,
+    "version": 3,
     "cmakeMinimumRequired": {
         "major": 3,
-        "minor": 20,
+        "minor": 21,
         "patch": 0
     },
     "configurePresets": [
         {
             "name": "default",
+            "binaryDir": "build",
+            "hidden": true
+        },
+        {
+            "name": "msvc",
             "hidden": true,
-            "binaryDir": "${sourceDir}/build/",
-            "cacheVariables": {
-                "BUILD_VF_UNIT_TESTS": "ON"
-            }
+            "generator": "Visual Studio 17 2022",
+            "architecture": "x64",
+            "condition": {
+                "type": "equals",
+                "lhs": "${hostSystemName}",
+                "rhs": "Windows"
+              }
         },
         {
-            "name": "default_make",
-            "inherits": "default",
+            "name": "make",
             "hidden": true,
-            "generator": "Unix Makefiles"
+            "generator": "Unix Makefiles",
+            "condition": {
+                "type": "notEquals",
+                "lhs": "${hostSystemName}",
+                "rhs": "Windows"
+              }
         },
         {
-            "name": "default_msvc",
-            "inherits": "default",
+            "name": "unit_tests",
             "hidden": true,
-            "generator": "Visual Studio 16 2019",
-            "architecture": "x64"
+            "cacheVariables": {
+                "BUILD_VF_UNIT_TESTS": "ON"
+            }
         },
         {
-            "name": "default_cpu",
+            "name": "cpu",
+            "inherits": "default",
             "hidden": true,
             "description": "CPU build of VirtualFluids",
             "cacheVariables": {
@@ -37,7 +50,8 @@
             }
         },
         {
-            "name": "default_gpu",
+            "name": "gpu",
+            "inherits": "default",
             "hidden": true,
             "description": "GPU build of VirtualFluids",
             "cacheVariables": {
@@ -46,9 +60,10 @@
             }
         },
         {
-            "name": "default_gpu_numerical_tests",
+            "name": "gpu_numerical_tests",
             "inherits": [
-                "default_gpu"
+                "gpu",
+                "unit_tests"
             ],
             "hidden": true,
             "description": "GPU numerical tests of VirtualFluids",
@@ -58,78 +73,74 @@
             }
         },
         {
-            "name": "default_all",
-            "hidden": true,
-            "description": "All build of VirtualFluids",
+            "name": "make_all",
             "inherits": [
-                "default_cpu",
-                "default_gpu"
+                "cpu",
+                "gpu",
+                "unit_tests",
+                "make"
             ],
-            "cacheVariables": {
-                "BUILD_VF_DOUBLE_ACCURACY": "ON"
-            }
+            "displayName": "all make configuration"
         },
         {
-            "name": "cpu_make",
+            "name": "make_cpu",
             "inherits": [
-                "default_make",
-                "default_cpu"
+                "cpu",
+                "unit_tests",
+                "make"
             ],
             "displayName": "cpu make configuration"
         },
         {
-            "name": "cpu_msvc",
-            "inherits": [
-                "default_msvc",
-                "default_cpu"
-            ],
-            "displayName": "cpu msvc configuration"
-        },
-        {
-            "name": "gpu_make",
+            "name": "make_gpu",
             "inherits": [
-                "default_make",
-                "default_gpu"
+                "gpu",
+                "unit_tests",
+                "make"
             ],
             "displayName": "gpu make configuration"
         },
         {
-            "name": "gpu_msvc",
+            "name": "msvc_all",
             "inherits": [
-                "default_msvc",
-                "default_gpu"
+                "cpu",
+                "gpu",
+                "unit_tests",
+                "msvc"
             ],
-            "displayName": "gpu msvc configuration"
+            "displayName": "all msvc configuration"
         },
         {
-            "name": "all_make",
+            "name": "msvc_cpu",
             "inherits": [
-                "default_make",
-                "default_all"
+                "cpu",
+                "unit_tests",
+                "msvc"
             ],
-            "displayName": "all make configuration"
+            "displayName": "cpu msvc configuration"
         },
         {
-            "name": "all_msvc",
+            "name": "msvc_gpu",
             "inherits": [
-                "default_msvc",
-                "default_all"
+                "gpu",
+                "unit_tests",
+                "msvc"
             ],
-            "displayName": "all msvc configuration"
+            "displayName": "gpu msvc configuration"
         },
         {
-            "name": "gpu_numerical_tests_make",
+            "name": "make_numerical_tests_gpu",
             "inherits": [
-                "default_make",
-                "default_gpu_numerical_tests"
+                "gpu_numerical_tests",
+                "make"
             ],
             "displayName": "gpu numerical tests make configuration"
         },
         {
-            "name": "gpu_numerical_tests_msvc",
+            "name": "msvc_numerical_tests_gpu",
             "inherits": [
-                "default_msvc",
-                "default_gpu_numerical_tests"
+                "msvc",
+                "gpu_numerical_tests"
             ],
             "displayName": "gpu numerical tests msvc configuration"
         }
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..adafcf99560acd9da79aa060194df8263b6e77e0
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include pythonbindings/*/bindings*
\ No newline at end of file
diff --git a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
index a3836e7906b9be66ec79f68bf53ccc079db9d9ef..d31a7b82a4e9e988f815139fb46318d231d450f8 100644
--- a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
+++ b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
@@ -1,11 +1,13 @@
 BootStrap: docker
 From: ubuntu:20.04
+Stage: build
 
 %files
     3rdParty 3rdParty
     apps apps
     CMake CMake
     Python Python
+    pythonbindings pythonbindings
     src src
     CMakeLists.txt CMakeLists.txt
     cpu.cmake cpu.cmake
@@ -19,7 +21,8 @@ From: ubuntu:20.04
     apt-get update &&          \
     apt-get install -y         \
     build-essential            \
-    cmake=3.16.3-1ubuntu1      \
+    ccache                     \
+    git                        \
     python3                    \
     python3-dev                \
     python3-pip                \
@@ -27,10 +30,32 @@ From: ubuntu:20.04
     libomp-dev                 \
     libgl1
 
-    pip3 install setuptools wheel numpy scipy pyvista
+    pip3 install setuptools wheel cmake numpy scipy pyvista scikit-build
 
     export PYTHONPATH=Python
-    python3 /setup.py install
+    python3 /setup.py bdist_wheel build_ext --build-temp=_skbuild -- -DBUILD_VF_CPU=ON -DBUILD_VF_DOUBLE_ACCURACY=ON
+
+    pip3 install $(find dist/*.whl)
+
+
+BootStrap: docker
+From: ubuntu:20.04
+Stage: runtime
+
+%files from build
+    Python Python
+    dist dist
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update &&          \
+    apt-get install -y         \
+    python3                    \
+    python3-pip                \
+    mpich                      \
+    libgl1
+
+    pip3 install $(find dist/*.whl)
 
 %environment
     export PYTHONPATH=/Python
diff --git a/Python/SlurmTests/poiseuille/rocket.yml b/Python/SlurmTests/poiseuille/rocket.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b186469a4d3fd4b8edfafa4fc3f6dcd64e311d70
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/rocket.yml
@@ -0,0 +1,23 @@
+host: $REMOTE_HOST
+user: $REMOTE_USER
+private_keyfile: $PRIVATE_KEY
+
+copy:
+  - from: Python/SlurmTests/poiseuille/slurm.job
+    to: poiseuille_test/slurm.job
+    overwrite: true
+
+  - from: Containers/PoiseuilleTestContainer.sif
+    to: poiseuille_test/PoiseuilleTestContainer.sif
+    overwrite: true
+
+collect:
+  - from: poiseuille_test/POISEUILLE_TEST.out
+    to: POISEUILLE_TEST.out
+    overwrite: true
+
+#clean:
+#  - poiseuille_test/PoiseuilleTestContainer.sif
+
+sbatch: poiseuille_test/slurm.job
+continue_if_job_fails: true
diff --git a/Python/SlurmTests/poiseuille/settings.py b/Python/SlurmTests/poiseuille/settings.py
index 4b4a1e4e9cc7f6118a60c22a40c70b027e3ac4e2..a3cdc5dc8b627612c2d57a58db36c9fbaa72efac 100644
--- a/Python/SlurmTests/poiseuille/settings.py
+++ b/Python/SlurmTests/poiseuille/settings.py
@@ -1,25 +1,58 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file settings.py
+! \ingroup Poiseuille
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import os
 from acousticscaling import OneDirectionalAcousticScaling
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters
+from pyfluids import cpu
 
 
-grid_params = GridParameters()
+grid_params = cpu.parameters.GridParameters()
 grid_params.node_distance = 1
 grid_params.number_of_nodes_per_direction = [1, 1, 16]
 grid_params.blocks_per_direction = [1, 1, 4]
 grid_params.periodic_boundary_in_x1 = True
 grid_params.periodic_boundary_in_x2 = True
 
-physical_params = PhysicalParameters()
+physical_params = cpu.parameters.PhysicalParameters()
 physical_params.lattice_viscosity = 1e-4
 
-runtime_params = RuntimeParameters()
+runtime_params = cpu.parameters.RuntimeParameters()
 runtime_params.number_of_threads = int(os.environ["PYFLUIDS_NUM_THREADS"])
 runtime_params.number_of_timesteps = 4_000_000
 runtime_params.timestep_log_interval = 1_000_000
 
-kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
 kernel.use_forcing = True
 kernel.forcing_in_x1 = 5e-10
 
diff --git a/Python/SlurmTests/poiseuille/simulation_runner.py b/Python/SlurmTests/poiseuille/simulation_runner.py
index 03fb24be7ea1a6468ae25ec3aa40ab59962ef91e..d54a35e72b298562f8ccec82677089f3898eec9b 100644
--- a/Python/SlurmTests/poiseuille/simulation_runner.py
+++ b/Python/SlurmTests/poiseuille/simulation_runner.py
@@ -5,7 +5,7 @@ from poiseuille.simulation import run_simulation
 from pyfluids.cpu.writer import Writer, OutputFormat
 
 
-scale_level = int(os.environ["PYFLUIDS_SCALE_LEVEL"])
+scale_level = int(os.environ.get("PYFLUIDS_SCALE_LEVEL", 1))
 grid_params, physical_params, runtime_params, kernel = Scaling.configuration_for_scale_level(scale_level)
 
 writer = Writer()
diff --git a/Python/SlurmTests/poiseuille/slurm.job b/Python/SlurmTests/poiseuille/slurm.job
index 488fc9a42f261d69a8212cff389721fdfb9cbf6e..b4e4da271920479ade008b28d4d2e6ce6343c3d3 100644
--- a/Python/SlurmTests/poiseuille/slurm.job
+++ b/Python/SlurmTests/poiseuille/slurm.job
@@ -1,5 +1,6 @@
 #!/bin/bash
 #SBATCH -J PyFluidsTest
+#SBATCH -o poiseuille_test/POISEUILLE_TEST.out
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=1
 #SBATCH --cpus-per-task=20
@@ -9,6 +10,9 @@
 #SBATCH --partition=standard
 
 source $HOME/.bashrc
+module load singularity/3.9.9
+
+cd poiseuille_test
 
 echo "PyFluids Poiseuille Test Case"
 echo "Number of tasks: ${SLURM_NTASKS}"
diff --git a/Python/acousticscaling.py b/Python/acousticscaling.py
index a664b8e924d648b680562b9aef11bee87b3562b1..7e71fed9fdd9f86415261ef4e22797021581f60c 100644
--- a/Python/acousticscaling.py
+++ b/Python/acousticscaling.py
@@ -1,22 +1,55 @@
-from pyfluids.cpu.kernel import LBMKernel
-from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file acousticscaling.py
+! \ingroup tests
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
+from pyfluids import cpu
 
 
 class OneDirectionalAcousticScaling:
 
-    def __init__(self, grid_parameters: GridParameters,
-                 physical_parameters: PhysicalParameters,
-                 runtime_parameters: RuntimeParameters,
-                 kernel: LBMKernel):
+    def __init__(self, grid_parameters: cpu.parameters.GridParameters,
+                 physical_parameters: cpu.parameters.PhysicalParameters,
+                 runtime_parameters: cpu.parameters.RuntimeParameters,
+                 kernel: cpu.kernel.LBMKernel):
         self._grid_params = grid_parameters
         self._physical_params = physical_parameters
         self._runtime_params = runtime_parameters
         self._kernel = kernel
 
-    def configuration_for_scale_level(self, level: int = 1) -> tuple[GridParameters,
-                                                                PhysicalParameters,
-                                                                RuntimeParameters,
-                                                                LBMKernel]:
+    def configuration_for_scale_level(self, level: int = 1) -> tuple[cpu.parameters.GridParameters,
+                                                                cpu.parameters.PhysicalParameters,
+                                                                cpu.parameters.RuntimeParameters,
+                                                                cpu.kernel.LBMKernel]:
         if level < 0:
             raise ValueError("level must be >= 0")
 
@@ -27,8 +60,8 @@ class OneDirectionalAcousticScaling:
 
         return grid_params, physical_params, runtime_params, kernel
 
-    def clone_grid_params_for_level(self, level) -> GridParameters:
-        grid_params = GridParameters()
+    def clone_grid_params_for_level(self, level) -> cpu.parameters.GridParameters:
+        grid_params = cpu.parameters.GridParameters()
         grid_params.reference_direction_index = self._grid_params.reference_direction_index
         grid_params.periodic_boundary_in_x1 = self._grid_params.periodic_boundary_in_x1
         grid_params.periodic_boundary_in_x2 = self._grid_params.periodic_boundary_in_x2
@@ -51,7 +84,7 @@ class OneDirectionalAcousticScaling:
         return grid_params
 
     def clone_physical_parameters(self, level):
-        physical_params = PhysicalParameters()
+        physical_params = cpu.parameters.PhysicalParameters()
         physical_params.lattice_viscosity = self._physical_params.lattice_viscosity
 
         if level > 0:
@@ -60,7 +93,7 @@ class OneDirectionalAcousticScaling:
         return physical_params
 
     def clone_runtime_params_for_level(self, level):
-        runtime_params = RuntimeParameters()
+        runtime_params = cpu.parameters.RuntimeParameters()
         runtime_params.number_of_timesteps = self._runtime_params.number_of_timesteps
         runtime_params.number_of_threads = self._runtime_params.number_of_threads
         runtime_params.timestep_log_interval = self._runtime_params.timestep_log_interval
@@ -71,7 +104,7 @@ class OneDirectionalAcousticScaling:
         return runtime_params
 
     def clone_kernel_for_level(self, level):
-        kernel = LBMKernel(self._kernel.type)
+        kernel = cpu.kernel.LBMKernel(self._kernel.type)
         kernel.use_forcing = self._kernel.use_forcing
         kernel.forcing_in_x1 = self._kernel.forcing_in_x1
         kernel.forcing_in_x2 = self._kernel.forcing_in_x2
diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
index 6e3c8608617df1267535984d53307dea9184c6ab..721af737ff6ef3340c3c2f6204aa6a7824cd1d2f 100644
--- a/Python/actuator_line/actuator_line.py
+++ b/Python/actuator_line/actuator_line.py
@@ -1,23 +1,48 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file actuator_line.py
+! \ingroup actuator_line
+! \author Henry Korb, Henrik Asmuth
+=======================================================================================
+"""
 #%%
 import numpy as np
 from pathlib import Path
 from mpi4py import MPI
-from pyfluids import basics, gpu, logger
+from pyfluids.bindings import basics, gpu, logger
 #%%
-reference_diameter = 126
-
-length = np.array([29,6,6])*reference_diameter
-viscosity = 1.56e-5
-velocity = 9
-mach = 0.1
-nodes_per_diameter = 32
-
-sim_name = "ActuatorLine"
-config_file = Path(__file__).parent/Path("config.txt")
+sim_name = "ABL"
+config_file = Path(__file__).parent/"configActuatorLine.txt"
 output_path = Path(__file__).parent/Path("output")
 output_path.mkdir(exist_ok=True)
-t_out = 100.
-t_end = 500.
+
 
 #%%
 logger.Logger.initialize_logger()
@@ -25,87 +50,175 @@ basics.logger.Logger.add_stdout()
 basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
 basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
 basics.logger.Logger.enable_printed_rank_numbers(True)
-# %%
-comm = gpu.Communicator.get_instance()
 #%%
 grid_factory = gpu.grid_generator.GridFactory.make()
 grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
+communicator = gpu.Communicator.get_instance()
 
-#%%
-dx = reference_diameter/nodes_per_diameter
-
-grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
-grid_builder.set_periodic_boundary_condition(False, False, False)
-grid_builder.build_grids(basics.LbmOrGks.LBM, False)
-#%%
 config = basics.ConfigurationFile()
 config.load(str(config_file))
+
+para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config)
+bc_factory = gpu.BoundaryConditionFactory()
+
 #%%
-para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid())
+turbine_diameter = config.get_float_value("turbineDiameter", 126)
+boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000)
+z0 = config.get_float_value("z0", 0.1)
+u_star = config.get_float_value("u_star", 0.4)
+
+kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant
+
+viscosity = config.get_float_value("viscosity", 1.56e-5)
+
+velocity  = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s
+
+mach = config.get_float_value("Ma", 0.1)
+nodes_per_height = config.get_uint_value("nz", 64)
+
+
+turb_pos = np.array([3,3,3])*turbine_diameter
+epsilon = config.get_float_value("SmearingWidth", 5)
+density = config.get_float_value("Density", 1.225)
+level = 0
+n_blades = 3
+n_blade_nodes = config.get_int_value("NumberOfNodesPerAL", 32)
+
+read_precursor = config.get_bool_value("readPrecursor", False)
+
+if read_precursor:
+    nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor")
+    use_distributions = config.get_bool_value("useDistributions", False)
+    precursor_directory = config.get_string_value("precursorDirectory")
+
+# all in s
+t_start_out   = config.get_float_value("tStartOut")
+t_out        = config.get_float_value("tOut")
+t_end        = config.get_float_value("tEnd") # total time of simulation
 
+t_start_averaging     =  config.get_float_value("tStartAveraging")
+t_start_tmp_averaging  =  config.get_float_value("tStartTmpAveraging")
+t_averaging          =  config.get_float_value("tAveraging")
+t_start_out_probe      =  config.get_float_value("tStartOutProbe")
+t_out_probe           =  config.get_float_value("tOutProbe")
+
+#%%
+length = np.array([6,4,1])*boundary_layer_height
+dx = boundary_layer_height/nodes_per_height
 dt = dx * mach / (np.sqrt(3) * velocity)
-velocity_lb = velocity * dt / dx # LB units
-viscosity_lb = viscosity * dt / (dx * dx) # LB units
+velocity_ratio = dx/dt
+velocity_LB = velocity / velocity_ratio # LB units
+viscosity_LB = viscosity / (velocity_ratio * dx) # LB units
+pressure_gradient = u_star * u_star / boundary_layer_height
+pressure_gradient_LB = pressure_gradient * (dt*dt)/dx
+
+logger.vf_log_info(f"velocity  [dx/dt] = {velocity_LB}")
+logger.vf_log_info(f"dt   = {dt}")
+logger.vf_log_info(f"dx   = {dx}")
+logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}")
+logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}")
+logger.vf_log_info(f"dpdx  = {pressure_gradient}")
+logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}")
+
 
 #%%
-para.set_devices([0])
 para.set_output_prefix(sim_name)
-para.set_output_path(str(output_path))
-para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix())
 para.set_print_files(True)
-para.set_max_level(1)
-#%%
-para.set_velocity(velocity_lb)
-para.set_viscosity(viscosity_lb)    
+
+para.set_forcing(pressure_gradient_LB, 0, 0)
+para.set_velocity_LB(velocity_LB)
+para.set_viscosity_LB(viscosity_LB)    
 para.set_velocity_ratio(dx/dt)
 para.set_viscosity_ratio(dx*dx/dt)
-para.set_main_kernel("TurbulentViscosityCumulantK17CompChim")
-para.set_use_AMD(True)
-para.set_SGS_constant(0.083)
+para.set_density_ratio(1.0)
 
-def init_func(coord_x, coord_y, coord_z):
-    return [0.0, velocity_lb, 0.0, 0.0]
+para.set_main_kernel("CumulantK17")
 
-para.set_initial_condition(init_func)
-para.set_t_out(int(t_out/dt))
-para.set_t_end(int(t_end/dt))
+para.set_timestep_start_out(int(t_start_out/dt))
+para.set_timestep_out(int(t_out/dt))
+para.set_timestep_end(int(t_end/dt))
 para.set_is_body_force(True)
-
 #%%
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0)
-
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0)
-grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0)
-
-grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0)
-grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0)
+tm_factory = gpu.TurbulenceModelFactory(para)
+tm_factory.read_config_file(config)
+#%%
+grid_scaling_factory = gpu.GridScalingFactory()
+grid_scaling_factory.set_scaling_factory(gpu.GridScaling.ScaleCompressible)
 
-grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0.0)
+grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
+grid_builder.set_periodic_boundary_condition(not read_precursor, True, False)
+grid_builder.build_grids(basics.LbmOrGks.LBM, False)
 
+sampling_offset = 2
+if read_precursor:
+    precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK)
+    grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0)
+
+grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0, dx)
+para.set_has_wall_model_monitor(True)
+grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1)
+
+if read_precursor:
+    grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0)
+bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack)
+bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) 
+bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective)
+if read_precursor:
+    bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor)
+para.set_outflow_pressure_correction_factor(0.0); 
 #%%
-cuda_memory_manager = gpu.CudaMemoryManager(para)
-grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm)
+# don't use python init functions, they are very slow! Just kept as an example.
+# Define lambda in bindings and set it here.
+# def init_func(coord_x, coord_y, coord_z):
+#     return [
+#         0.0, 
+#         (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1))  * dt / dx, 
+#         2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.)  * dt / dx, 
+#         8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx]
+# para.set_initial_condition(init_func)
+para.set_initial_condition_perturbed_log_law(u_star, z0, length[0], length[2], boundary_layer_height, velocity_ratio)
+
 #%%
-turb_pos = np.array([3,3,3])*reference_diameter
-epsilon = 5
+turb_pos = np.array([3,3,3])*turbine_diameter
+epsilon = 1.5*dx
 density = 1.225
 level = 0
 n_blades = 3
 n_blade_nodes = 32
-alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx)
+omega = 1
+blade_radii = np.arange(n_blade_nodes, dtype=np.float32)/(0.5*turbine_diameter)
+alm = gpu.ActuatorFarm(n_blades, density, n_blade_nodes, epsilon, level, dt, dx, True)
+alm.add_turbine(turb_pos[0],turb_pos[1],turb_pos[2], turbine_diameter, omega, 0, 0, blade_radii)
 para.add_actuator(alm)
 #%%
-point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 1, 500, 100)
-point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter)
-point_probe.add_statistic(gpu.probes.Statistic.Means)
-
-para.add_probe(point_probe)
-
-plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 1, 500, 100)
-plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2])
-para.add_probe(plane_probe)
+planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z')
+planar_average_probe.add_all_available_statistics()
+planar_average_probe.set_file_name_to_n_out()
+para.add_probe(planar_average_probe)
 #%%
-sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator)
+wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt))
+wall_model_probe.add_all_available_statistics()
+wall_model_probe.set_file_name_to_n_out()
+wall_model_probe.set_force_output_to_stress(True)
+if para.get_is_body_force():
+    wall_model_probe.set_evaluate_pressure_gradient(True)
+para.add_probe(wall_model_probe)
+
+plane_locs = [100,]
+if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0])
+
+for n_probe, probe_pos in enumerate(plane_locs):
+    plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt))
+    plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2])
+    plane_probe.add_all_available_statistics()
+    para.add_probe(plane_probe)
+#%%
+cuda_memory_manager = gpu.CudaMemoryManager(para)
+grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator)
+#%%
+#%%
+sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory, grid_scaling_factory)
 #%%
 sim.run()
-MPI.Finalize()
\ No newline at end of file
+MPI.Finalize()
+
diff --git a/Python/actuator_line/config.txt b/Python/actuator_line/config.txt
deleted file mode 100644
index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000
--- a/Python/actuator_line/config.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Path = .
-GridPath = .
diff --git a/Python/actuator_line/configActuatorLine.txt b/Python/actuator_line/configActuatorLine.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c45d170f039274ab355f3fe1dc044536f1f29e6f
--- /dev/null
+++ b/Python/actuator_line/configActuatorLine.txt
@@ -0,0 +1,39 @@
+##################################################
+#informations for Writing
+##################################################
+Path = .
+##################################################
+#informations for reading
+##################################################
+GridPath = .
+##################################################
+Devices = 0 
+##################################################
+tStartOut           = 0
+tOut                = 100000
+tEnd                = 300000
+##################################################
+tStartAveraging     = 0
+tStartTmpAveraging  = 100000
+tAveraging          = 200
+tStartOutProbe      = 0
+tOutProbe           = 1000 
+##################################################
+Ma = 0.1
+nz = 96 
+
+bodyForce = true
+SGSconstant = 0.333
+TurbulenceModel = QR
+
+QuadricLimiterP = 100000.0
+QuadricLimiterM = 100000.0
+QuadricLimiterD = 100000.0
+
+##################################################
+readPrecursor = false
+nTimestepsReadPrecursor = 1
+precursorFile = precursor/Precursor
+
+##################################################
+turbineDiameter = 126.0
diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py
index 1c01f50946b49bc0ddab7e50065a24aab4ae869f..6f6c64bc072d3afbb8aa5febbec209c26af2deee 100644
--- a/Python/boundary_layer/boundary_layer.py
+++ b/Python/boundary_layer/boundary_layer.py
@@ -1,37 +1,48 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file boundary_layer.py
+! \ingroup boundary_layer
+! \author Henry Korb, Henrik Asmuth
+=======================================================================================
+"""
 #%%
 import numpy as np
 from pathlib import Path
 from mpi4py import MPI
 from pyfluids import basics, gpu, logger
 #%%
-reference_height = 1000 # boundary layer height in m
-
-length = np.array([6,4,1])*reference_height
-viscosity = 1.56e-5
-mach = 0.1
-nodes_per_height = 32
-
-z_0 = 0.1
-u_star = 0.4
-kappa = 0.4
-
-velocity = 0.5*u_star/kappa*np.log(length[2]/z_0+1)
-flow_through_time = length[0]/velocity
-use_AMD = True
-
-
-sim_name = "BoundaryLayer"
-config_file = Path(__file__).parent/Path("config.txt")
+sim_name = "ABL"
+config_file = Path(__file__).parent/"configBoundaryLayer.txt"
 output_path = Path(__file__).parent/Path("output")
 output_path.mkdir(exist_ok=True)
-t_out = 1000.
-t_end = 5000.
 
-t_start_averaging = 0
-t_start_tmp_averaging =  100_000
-t_averaging = 200
-t_start_out_probe = 0
-t_out_probe = 1000
 
 #%%
 logger.Logger.initialize_logger()
@@ -39,95 +50,161 @@ basics.logger.Logger.add_stdout()
 basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
 basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
 basics.logger.Logger.enable_printed_rank_numbers(True)
-# %%
-comm = gpu.Communicator.get_instance()
 #%%
 grid_factory = gpu.grid_generator.GridFactory.make()
 grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
+communicator = gpu.Communicator.get_instance()
+
+config = basics.ConfigurationFile()
+config.load(str(config_file))
+
+para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config)
+bc_factory = gpu.BoundaryConditionFactory()
 
 #%%
-dx = reference_height/nodes_per_height
-dt = dx * mach / (np.sqrt(3) * velocity)
-velocity_lb = velocity * dt / dx # LB units
-viscosity_lb = viscosity * dt / (dx * dx) # LB units
+boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000)
+z0 = config.get_float_value("z0", 0.1)
+u_star = config.get_float_value("u_star", 0.4)
 
-pressure_gradient = u_star**2 / reference_height
-pressure_gradient_lb = pressure_gradient * dt**2 / dx
+kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant
 
-logger.vf_log_info(f"velocity    = {velocity_lb:1.6} dx/dt")
-logger.vf_log_info(f"dt          = {dt:1.6}")
-logger.vf_log_info(f"dx          = {dx:1.6}")
-logger.vf_log_info(f"u*          = {u_star:1.6}")
-logger.vf_log_info(f"dpdx        = {pressure_gradient:1.6}")
-logger.vf_log_info(f"dpdx        = {pressure_gradient_lb:1.6} dx/dt^2")
-logger.vf_log_info(f"viscosity   = {viscosity_lb:1.6} dx^2/dt")
+viscosity = config.get_float_value("viscosity", 1.56e-5)
 
+velocity  = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s
 
-#%%
-config = basics.ConfigurationFile()
-config.load(str(config_file))
-#%%
-para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid())
+mach = config.get_float_value("Ma", 0.1)
+nodes_per_height = config.get_uint_value("nz", 64)
+
+
+
+write_precursor = config.get_bool_value("_p", False)
+read_precursor = config.get_bool_value("readPrecursor", False)
+
+if write_precursor:
+    nTWritePrecursor      = config.get_int_value("nTimestepsWritePrecursor")
+    t_start_precursor      = config.get_float_value("tStartPrecursor")
+    pos_x_precursor        = config.get_float_value("posXPrecursor")
 
+if read_precursor:
+    nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor")
 
+if write_precursor or read_precursor:
+    use_distributions = config.get_bool_value("useDistributions", False)
+    precursor_directory = config.get_string_value("precursorDirectory")
+
+# all in s
+t_start_out   = config.get_float_value("tStartOut")
+t_out        = config.get_float_value("tOut")
+t_end        = config.get_float_value("tEnd") # total time of simulation
+
+t_start_averaging     =  config.get_float_value("tStartAveraging")
+t_start_tmp_averaging  =  config.get_float_value("tStartTmpAveraging")
+t_averaging          =  config.get_float_value("tAveraging")
+t_start_out_probe      =  config.get_float_value("tStartOutProbe")
+t_out_probe           =  config.get_float_value("tOutProbe")
+
+#%%
+length = np.array([6,4,1])*boundary_layer_height
+dx = boundary_layer_height/nodes_per_height
+dt = dx * mach / (np.sqrt(3) * velocity)
+velocity_LB = velocity * dt / dx # LB units
+viscosity_LB = viscosity * dt / (dx * dx) # LB units
+pressure_gradient = u_star * u_star / boundary_layer_height
+pressure_gradient_LB = pressure_gradient * (dt*dt)/dx
+
+logger.vf_log_info(f"velocity  [dx/dt] = {velocity_LB}")
+logger.vf_log_info(f"dt   = {dt}")
+logger.vf_log_info(f"dx   = {dx}")
+logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}")
+logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}")
+logger.vf_log_info(f"dpdx  = {pressure_gradient}")
+logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}")
+    
+#%%
 
 #%%
-para.set_devices([0])
 para.set_output_prefix(sim_name)
-para.set_output_path(str(output_path))
-para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix())
 para.set_print_files(True)
-para.set_max_level(1)
-#%%
-para.set_velocity(velocity_lb)
-para.set_viscosity(viscosity_lb)    
+
+para.set_forcing(pressure_gradient_LB, 0, 0)
+para.set_velocity_LB(velocity_LB)
+para.set_viscosity_LB(viscosity_LB)    
 para.set_velocity_ratio(dx/dt)
 para.set_viscosity_ratio(dx*dx/dt)
-para.set_use_AMD(use_AMD)
+para.set_density_ratio(1.0)
+
+para.set_main_kernel("CumulantK17")
 
-para.set_main_kernel("TurbulentViscosityCumulantK17CompChim" if para.get_use_AMD() else "CummulantK17CompChim")
+para.set_timestep_start_out(int(t_start_out/dt))
+para.set_timestep_out(int(t_out/dt))
+para.set_timestep_end(int(t_end/dt))
+para.set_is_body_force(config.get_bool_value("bodyForce"))
+#%%
+tm_factory = gpu.TurbulenceModelFactory(para)
+tm_factory.read_config_file(config)
+#%%
+grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
+grid_builder.set_periodic_boundary_condition(not read_precursor, True, False)
+grid_builder.build_grids(basics.LbmOrGks.LBM, False)
 
-para.set_SGS_constant(0.083)
+sampling_offset = 2
+if read_precursor:
+    precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK)
+    grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0)
 
+grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0/dx)
+para.set_has_wall_model_monitor(True)
+grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1)
+
+if read_precursor:
+    grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0)
+bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack)
+bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) 
+bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective)
+bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor)
+para.set_outflow_pressure_correction_factor(0.0); 
+#%%
 def init_func(coord_x, coord_y, coord_z):
     return [
         0.0, 
-        (u_star/kappa*np.log(max(coord_z/z_0,0)+1) + 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2]))/((coord_z/reference_height)**2+0.1)*dt/dx, 
-        2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2])/((coord_z/reference_height)**2+0.1)*dt/dx, 
-        8*u_star/kappa*(np.sin(np.pi*8*coord_y/reference_height)*np.sin(np.pi*8*coord_z/reference_height)+np.sin(np.pi*8*coord_x/length[0]))/((length[2]/2-coord_z)**2+0.1)*dt/dx
-        ]
-
+        (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1))  * dt / dx, 
+        2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.)  * dt / dx, 
+        8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx]
 para.set_initial_condition(init_func)
-para.set_t_out(int(t_out/dt))
-para.set_t_end(int(t_end/dt))
-para.set_is_body_force(True)
-para.set_has_wall_model_monitor(True)
 
-
-grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
-grid_builder.set_periodic_boundary_condition(True, True, False)
-grid_builder.build_grids(basics.LbmOrGks.LBM, False)
 #%%
-sampling_offset = 2
-grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0.0, 0.0, 1.0, sampling_offset, z_0/dx)
-grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0.0, 0.0, 0.0)
+planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z')
+planar_average_probe.add_all_available_statistics()
+planar_average_probe.set_file_name_to_n_out()
+para.add_probe(planar_average_probe)
+#%%
+wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt))
+wall_model_probe.add_all_available_statistics()
+wall_model_probe.set_file_name_to_n_out()
+wall_model_probe.set_force_output_to_stress(True)
+if para.get_is_body_force():
+    wall_model_probe.set_evaluate_pressure_gradient(True)
+para.add_probe(wall_model_probe)
+
+plane_locs = [100,]
+if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0])
+
+for n_probe, probe_pos in enumerate(plane_locs):
+    plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt))
+    plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2])
+    plane_probe.add_all_available_statistics()
+    para.add_probe(plane_probe)
+
+if write_precursor:
+    precursor_writer = gpu.PrecursorWriter("precursor", para.get_output_path() + precursor_directory, pos_x_precursor, 0,length[1], 0, length[2], t_start_precursor/dt, nTWritePrecursor, gpu.OutputVariable.Distributions if use_distributions else gpu.OutputVariable.Velocities)
+    para.add_probe(precursor_writer)
 
 #%%
 cuda_memory_manager = gpu.CudaMemoryManager(para)
-grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm)
-
+grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator)
 #%%
-wall_probe = gpu.probes.WallModelProbe("wallModelProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt))
-wall_probe.add_all_available_statistics()
-wall_probe.set_file_name_to_n_out()
-wall_probe.set_force_output_to_stress(True)
-if para.get_is_body_force():
-    wall_probe.set_evaluate_pressure_gradient(True)
-planar_probe = gpu.probes.PlanarAverageProbe("planarAverageProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt), "z")
-para.add_probe(wall_probe)
-
 #%%
-sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator)
+sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory)
 #%%
 sim.run()
 MPI.Finalize()
\ No newline at end of file
diff --git a/Python/boundary_layer/config.txt b/Python/boundary_layer/config.txt
deleted file mode 100644
index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000
--- a/Python/boundary_layer/config.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Path = .
-GridPath = .
diff --git a/Python/boundary_layer/configBoundaryLayer.txt b/Python/boundary_layer/configBoundaryLayer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83e7861a5fb85ea800d187699f1c6c1409422f0a
--- /dev/null
+++ b/Python/boundary_layer/configBoundaryLayer.txt
@@ -0,0 +1,42 @@
+##################################################
+#informations for Writing
+##################################################
+Path = .
+##################################################
+#informations for reading
+##################################################
+GridPath = .
+##################################################
+Devices = 0 
+##################################################
+tStartOut           = 0
+tOut                = 100000
+tEnd                = 300000
+##################################################
+tStartAveraging     = 0
+tStartTmpAveraging  = 100000
+tAveraging          = 200
+tStartOutProbe      = 0
+tOutProbe           = 1000 
+##################################################
+Ma = 0.1
+nz = 96 
+
+bodyForce = true
+UseAMD = true
+SGSconstant = 0.2
+QuadricLimiterP = 100000.0
+QuadricLimiterM = 100000.0
+QuadricLimiterD = 100000.0
+
+##################################################
+readPrecursor = false
+nTimestepsReadPrecursor = 10
+precursorFile = precursor/Precursor
+
+##################################################
+writePrecursor = false
+nTimestepsWritePrecursor = 10
+
+tStartPrecursor = 100
+posXPrecursor = 3000
\ No newline at end of file
diff --git a/Python/cubeflow/simulation.py b/Python/cubeflow/simulation.py
index 9e77e8d747c072188d8d81150afa8e2ccb76a792..deb0411963aec65522af45cc48d7367f103232c6 100644
--- a/Python/cubeflow/simulation.py
+++ b/Python/cubeflow/simulation.py
@@ -1,13 +1,42 @@
-from pyfluids.cpu import Simulation
-from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition, DensityBoundaryCondition
-from pyfluids.cpu.geometry import GbCuboid3D
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import PhysicalParameters, RuntimeParameters, GridParameters
-from pyfluids.cpu.writer import Writer, OutputFormat
-from pymuparser import Parser
-
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file simulation.py
+! \ingroup cubeflow
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import os
 
+from pyfluids import cpu
+from pymuparser import Parser
+
 
 def get_max_length(number_of_nodes_per_direction, delta_x):
     return (number_of_nodes_per_direction[0] * delta_x,
@@ -15,10 +44,10 @@ def get_max_length(number_of_nodes_per_direction, delta_x):
             number_of_nodes_per_direction[2] * delta_x)
 
 
-physical_params = PhysicalParameters()
+physical_params = cpu.parameters.PhysicalParameters()
 physical_params.lattice_viscosity = 0.005
 
-grid_params = GridParameters()
+grid_params = cpu.parameters.GridParameters()
 grid_params.number_of_nodes_per_direction = [200, 120, 120]
 grid_params.blocks_per_direction = [2, 2, 2]
 grid_params.node_distance = 0.125
@@ -26,7 +55,7 @@ grid_params.periodic_boundary_in_x1 = False
 grid_params.periodic_boundary_in_x2 = True
 grid_params.periodic_boundary_in_x3 = True
 
-runtime_params = RuntimeParameters()
+runtime_params = cpu.parameters.RuntimeParameters()
 runtime_params.timestep_log_interval = 1000
 runtime_params.number_of_timesteps = 50000
 runtime_params.number_of_threads = int(os.environ.get("OMP_NUM_THREADS", 4))
@@ -39,46 +68,46 @@ def run_simulation(physical_parameters=physical_params, grid_parameters=grid_par
     min_x, min_y, min_z = 0, 0, 0
     max_x, max_y, max_z = get_max_length(grid_parameters.number_of_nodes_per_direction, grid_parameters.node_distance)
 
-    bottom_wall = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z, max_x + wall_thickness,
+    bottom_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z, max_x + wall_thickness,
                              max_y + wall_thickness, min_z - wall_thickness)
 
-    top_wall = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, max_z, max_x + wall_thickness,
+    top_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, max_z, max_x + wall_thickness,
                           max_y + wall_thickness,
                           max_z + wall_thickness)
 
-    left_wall = GbCuboid3D(min_x - wall_thickness, min_y, min_z - wall_thickness, max_x + wall_thickness,
+    left_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y, min_z - wall_thickness, max_x + wall_thickness,
                            min_y - wall_thickness,
                            max_z + wall_thickness)
 
-    right_wall = GbCuboid3D(min_x - wall_thickness, max_y, min_z - wall_thickness, max_x + wall_thickness,
+    right_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, max_y, min_z - wall_thickness, max_x + wall_thickness,
                             max_y + wall_thickness, max_z + wall_thickness)
 
-    obstacle = GbCuboid3D(7, 7, 7, 8, 8, 8)
+    obstacle = cpu.geometry.GbCuboid3D(7, 7, 7, 8, 8, 8)
 
-    velocity_boundary = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z - wall_thickness, min_x,
+    velocity_boundary = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z - wall_thickness, min_x,
                                    max_y + wall_thickness, max_z + wall_thickness)
 
-    outflow_boundary = GbCuboid3D(max_x, min_y - wall_thickness, min_z - wall_thickness, max_x + wall_thickness,
+    outflow_boundary = cpu.geometry.GbCuboid3D(max_x, min_y - wall_thickness, min_z - wall_thickness, max_x + wall_thickness,
                                   max_y + wall_thickness, max_z + wall_thickness)
 
-    no_slip_bc = NoSlipBoundaryCondition()
+    no_slip_bc = cpu.boundaryconditions.NoSlipBoundaryCondition()
 
-    outflow_bc = DensityBoundaryCondition()
+    outflow_bc = cpu.boundaryconditions.DensityBoundaryCondition()
 
     velocity_function = Parser()
     velocity_function.define_constant("u", 0.07)
     velocity_function.expression = "u"
-    velocity_bc = VelocityBoundaryCondition(True, False, False, velocity_function, 0, -10)
+    velocity_bc = cpu.boundaryconditions.VelocityBoundaryCondition(True, False, False, velocity_function, 0, -10)
 
-    kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+    kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
     # kernel.use_forcing = True
     # kernel.forcing_in_x1 = 3e-6
 
-    writer = Writer()
+    writer = cpu.writer.Writer()
     writer.output_path = "./output"
-    writer.output_format = OutputFormat.BINARY
+    writer.output_format = cpu.writer.OutputFormat.BINARY
 
-    simulation = Simulation()
+    simulation = cpu.Simulation()
     simulation.set_writer(writer)
 
     simulation.set_physical_parameters(physical_parameters)
diff --git a/Python/liddrivencavity/simulation.py b/Python/liddrivencavity/simulation.py
index 155fad2f6f8aade0368c8a7006b88f7985f8822c..3c247b87a102e3c5a720f20748acc9f9f50bb178 100644
--- a/Python/liddrivencavity/simulation.py
+++ b/Python/liddrivencavity/simulation.py
@@ -1,32 +1,61 @@
-from pyfluids.cpu import Simulation
-from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition
-from pyfluids.cpu.geometry import GbCuboid3D
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
-from pyfluids.cpu.writer import Writer, OutputFormat
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file simulation.py
+! \ingroup liddrivencavity
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
+from pyfluids import cpu
 from pymuparser import Parser
 
-runtime_params = RuntimeParameters()
+runtime_params = cpu.parameters.RuntimeParameters()
 runtime_params.number_of_threads = 4
 runtime_params.number_of_timesteps = 10000
 runtime_params.timestep_log_interval = 1000
 
-physical_params = PhysicalParameters()
+physical_params = cpu.parameters.PhysicalParameters()
 physical_params.lattice_viscosity = 0.005
 
-grid_params = GridParameters()
+grid_params = cpu.parameters.GridParameters()
 grid_params.number_of_nodes_per_direction = [64, 64, 64]
 grid_params.blocks_per_direction = [2, 2, 2]
 grid_params.node_distance = 1 / 10
 
 
 def run_simulation(physical_params=physical_params, grid_params=grid_params, runtime_params=runtime_params):
-    simulation = Simulation()
-    kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+    simulation = cpu.Simulation()
+    kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
 
-    writer = Writer()
+    writer = cpu.writer.Writer()
     writer.output_path = "./output"
-    writer.output_format = OutputFormat.BINARY
+    writer.output_format = cpu.writer.OutputFormat.BINARY
 
     simulation.set_grid_parameters(grid_params)
     simulation.set_physical_parameters(physical_params)
@@ -34,12 +63,12 @@ def run_simulation(physical_params=physical_params, grid_params=grid_params, run
     simulation.set_kernel_config(kernel)
     simulation.set_writer(writer)
 
-    no_slip_bc_adapter = NoSlipBoundaryCondition()
+    no_slip_bc_adapter = cpu.boundaryconditions.NoSlipBoundaryCondition()
 
     fct = Parser()
     fct.expression = "u"
     fct.define_constant("u", 0.005)
-    velocity_bc_adapter = VelocityBoundaryCondition(True, True, False, fct, 0, -10.0)
+    velocity_bc_adapter = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, False, fct, 0, -10.0)
 
     g_min_x1, g_min_x2, g_min_x3 = 0, 0, 0
     g_max_x1 = grid_params.number_of_nodes_per_direction[0] * grid_params.node_distance
@@ -48,12 +77,12 @@ def run_simulation(physical_params=physical_params, grid_params=grid_params, run
 
     dx = grid_params.node_distance
 
-    wall_x_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_min_x1, g_max_x2 + dx, g_max_x3)
-    wall_x_max = GbCuboid3D(g_max_x1, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3)
-    wall_y_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_min_x2, g_max_x3)
-    wall_y_max = GbCuboid3D(g_min_x1 - dx, g_max_x2, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3)
-    wall_z_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_min_x3)
-    wall_z_max = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_max_x3, g_max_x1 + dx, g_max_x2 + dx, g_max_x3 + dx)
+    wall_x_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_min_x1, g_max_x2 + dx, g_max_x3)
+    wall_x_max = cpu.geometry.GbCuboid3D(g_max_x1, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3)
+    wall_y_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_min_x2, g_max_x3)
+    wall_y_max = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_max_x2, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3)
+    wall_z_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_min_x3)
+    wall_z_max = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_max_x3, g_max_x1 + dx, g_max_x2 + dx, g_max_x3 + dx)
 
     simulation.add_object(wall_x_min, no_slip_bc_adapter, 1, "/geo/wallXmin")
     simulation.add_object(wall_x_max, no_slip_bc_adapter, 1, "/geo/wallXmax")
diff --git a/Python/poiseuille/poiseuille_hpc.py b/Python/poiseuille/poiseuille_hpc.py
index f5f5a1387c9fe234abae0c6f979cc7d5b283d1a4..b108f34445a71a686c4e22f685e26e10204113b3 100644
--- a/Python/poiseuille/poiseuille_hpc.py
+++ b/Python/poiseuille/poiseuille_hpc.py
@@ -1,15 +1,49 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file poiseuille_hpc.py
+! \ingroup poiseuille
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 from poiseuille.simulation import run_simulation
-from pyfluids.cpu.parameters import *
+from pyfluids import cpu
 
-grid_parameters = GridParameters()
+grid_parameters = cpu.prameters.GridParameters()
 grid_parameters.number_of_nodes_per_direction = [64, 64, 512]
 grid_parameters.node_distance = 1
 grid_parameters.blocks_per_direction = [1, 2, 2]
 
-physical_parameters = PhysicalParameters()
+physical_parameters = cpu.prameters.PhysicalParameters()
 physical_parameters.lattice_viscosity = 0.0005
 
-runtime_parameters = RuntimeParameters()
+runtime_parameters = cpu.prameters.RuntimeParameters()
 runtime_parameters.number_of_threads = 4
 runtime_parameters.number_of_timesteps = 1000
 runtime_parameters.timestep_log_interval = 100
diff --git a/Python/poiseuille/simulation.py b/Python/poiseuille/simulation.py
index d107801fa84cfe16d1d7e91d31dc3ff4b8671f02..a6f12e59fbd0a0ccad9a4db9ccde69b828cf90bf 100644
--- a/Python/poiseuille/simulation.py
+++ b/Python/poiseuille/simulation.py
@@ -1,35 +1,65 @@
-from pyfluids.cpu import Simulation
-from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition
-from pyfluids.cpu.geometry import GbCuboid3D, State
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters
-from pyfluids.cpu.writer import Writer, OutputFormat
-
-default_grid_params = GridParameters()
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file simulation.py
+! \ingroup poiseuille
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
+from pyfluids import cpu
+
+
+default_grid_params = cpu.parameters.GridParameters()
 default_grid_params.node_distance = 10 / 32
 default_grid_params.number_of_nodes_per_direction = [8, 8, 32]
 default_grid_params.blocks_per_direction = [1, 1, 4]
 default_grid_params.periodic_boundary_in_x1 = True
 default_grid_params.periodic_boundary_in_x2 = True
 
-default_physical_params = PhysicalParameters()
+default_physical_params = cpu.parameters.PhysicalParameters()
 default_physical_params.lattice_viscosity = 0.005
 
-default_runtime_params = RuntimeParameters()
+default_runtime_params = cpu.parameters.RuntimeParameters()
 default_runtime_params.number_of_threads = 4
 default_runtime_params.number_of_timesteps = 10000
 default_runtime_params.timestep_log_interval = 1000
 
-default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+default_kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
 default_kernel.use_forcing = True
 default_kernel.forcing_in_x1 = 1e-8
 
-default_writer = Writer()
+default_writer = cpu.writer.Writer()
 default_writer.output_path = "./output"
-default_writer.output_format = OutputFormat.BINARY
+default_writer.output_format = cpu.writer.OutputFormat.BINARY
 
 
-default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+default_kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
 default_kernel.use_forcing = True
 default_kernel.forcing_in_x1 = 1e-8
 
@@ -39,7 +69,7 @@ def run_simulation(physical_params=default_physical_params,
                    runtime_params=default_runtime_params,
                    kernel=default_kernel,
                    writer=default_writer):
-    simulation = Simulation()
+    simulation = cpu.Simulation()
 
     simulation.set_kernel_config(kernel)
     simulation.set_physical_parameters(physical_params)
@@ -47,11 +77,11 @@ def run_simulation(physical_params=default_physical_params,
     simulation.set_runtime_parameters(runtime_params)
     simulation.set_writer(writer)
 
-    no_slip_bc = NoSlipBoundaryCondition()
+    no_slip_bc = cpu.boundaryconditions.NoSlipBoundaryCondition()
 
     block_thickness = 3 * grid_params.node_distance
     simulation.add_object(
-        GbCuboid3D(
+        cpu.geometry.GbCuboid3D(
             grid_params.bounding_box.min_x1 - block_thickness,
             grid_params.bounding_box.min_x2 - block_thickness,
             grid_params.bounding_box.min_x3 - block_thickness,
@@ -59,10 +89,10 @@ def run_simulation(physical_params=default_physical_params,
             grid_params.bounding_box.max_x2 + block_thickness,
             grid_params.bounding_box.min_x3),
         no_slip_bc,
-        State.SOLID, "/geo/addWallZMin")
+        cpu.geometry.State.SOLID, "/geo/addWallZMin")
 
     simulation.add_object(
-        GbCuboid3D(
+        cpu.geometry.GbCuboid3D(
             grid_params.bounding_box.min_x1 - block_thickness,
             grid_params.bounding_box.min_x2 - block_thickness,
             grid_params.bounding_box.max_x3,
@@ -70,7 +100,7 @@ def run_simulation(physical_params=default_physical_params,
             grid_params.bounding_box.max_x2 + block_thickness,
             grid_params.bounding_box.max_x3 + block_thickness),
         no_slip_bc,
-        State.SOLID, "/geo/addWallZMax")
+        cpu.geometry.State.SOLID, "/geo/addWallZMax")
 
     simulation.run_simulation()
 
diff --git a/Python/poiseuille/test_poiseuille_l2.py b/Python/poiseuille/test_poiseuille_l2.py
index 93aa2600d5260dea7e72f3aa98db7334fe5285c6..818cba40e115945c60e4fa2ac96b3b6b5ab0bba8 100644
--- a/Python/poiseuille/test_poiseuille_l2.py
+++ b/Python/poiseuille/test_poiseuille_l2.py
@@ -1,3 +1,37 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file test_poiseuille_l2.py
+! \ingroup poiseuille
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import os
 import shutil
 import unittest
@@ -5,8 +39,7 @@ import unittest
 import matplotlib.pyplot as plt
 import numpy as np
 import pyvista as pv
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from pyfluids import cpu
 from scipy import stats
 
 from errors import normalized_l2_error
@@ -33,13 +66,13 @@ class TestPoiseuilleFlow(unittest.TestCase):
         self.skipTest("This test is not implemented correctly yet")
         plt.ion()
 
-        physical_params = PhysicalParameters()
+        physical_params = cpu.parameters.PhysicalParameters()
 
-        runtime_params = RuntimeParameters()
+        runtime_params = cpu.parameters.RuntimeParameters()
         runtime_params.number_of_threads = os.cpu_count()
         runtime_params.timestep_log_interval = 10000
 
-        kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+        kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
         kernel.use_forcing = True
 
         normalized_l2_errors = []
@@ -140,7 +173,7 @@ def get_heights_from_indices(mesh, indices):
 
 
 def create_grid_params_with_nodes_in_column(nodes_in_column, delta_x):
-    grid_params = GridParameters()
+    grid_params = cpu.parameters.GridParameters()
     grid_params.node_distance = delta_x
     grid_params.number_of_nodes_per_direction = [1, 1, nodes_in_column]
     grid_params.blocks_per_direction = [1, 1, 8]
diff --git a/Python/tests/test_acousticscaling.py b/Python/tests/test_acousticscaling.py
index 6413123a80db8c5882fcf1dbe6f72a1f5438736c..02454b935e3a147e045f45c273392646aeca6b8c 100644
--- a/Python/tests/test_acousticscaling.py
+++ b/Python/tests/test_acousticscaling.py
@@ -1,9 +1,41 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file test_acousticscaling.py
+! \ingroup tests
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import unittest
 from typing import List
 
-from pyfluids.cpu.kernel import LBMKernel, KernelType
-from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters
-
+from pyfluids import cpu
 from acousticscaling import OneDirectionalAcousticScaling
 
 
@@ -58,18 +90,18 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase):
         self.assertEqual(self.grid_params.periodic_boundary_in_x2, actual_grid_params.periodic_boundary_in_x2)
         self.assertEqual(self.grid_params.periodic_boundary_in_x3, actual_grid_params.periodic_boundary_in_x3)
 
-    def assert_physical_params_scaled_by_factor(self, actual_params: PhysicalParameters, factor: int):
+    def assert_physical_params_scaled_by_factor(self, actual_params: cpu.parameters.PhysicalParameters, factor: int):
         self.assertEqual(self.physical_params.lattice_viscosity * factor, actual_params.lattice_viscosity)
         self.assertEqual(self.physical_params.bulk_viscosity_factor, actual_params.bulk_viscosity_factor)
 
-    def assert_runtime_params_scaled_by_factor(self, actual_params: RuntimeParameters, factor: int):
+    def assert_runtime_params_scaled_by_factor(self, actual_params: cpu.parameters.RuntimeParameters, factor: int):
         self.assertEqual(self.runtime_params.number_of_timesteps * factor, actual_params.number_of_timesteps)
         self.assertEqual(self.runtime_params.number_of_threads, actual_params.number_of_threads)
         self.assertEqual(self.runtime_params.timestep_log_interval, actual_params.timestep_log_interval)
 
-    def assert_kernel_forcing_scaled_by_factor(self, actual_kernel: LBMKernel, factor: int):
+    def assert_kernel_forcing_scaled_by_factor(self, actual_kernel: cpu.kernel.LBMKernel, factor: int):
         self.assertEqual(self.kernel.type, actual_kernel.type)
-        self.assertEqual(self.kernel.use_forcing, actual_kernel.use_forcing)
+        self.assertEqual(self.kernel.use_forcing, actual_kernel.cpu.parameters.use_forcing)
         self.assertAlmostEqual(self.kernel.forcing_in_x1 / factor, actual_kernel.forcing_in_x1)
         self.assertAlmostEqual(self.kernel.forcing_in_x2, actual_kernel.forcing_in_x2)
         self.assertAlmostEqual(self.kernel.forcing_in_x3, actual_kernel.forcing_in_x3)
@@ -80,14 +112,14 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase):
 
     @staticmethod
     def make_kernel():
-        kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+        kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity)
         kernel.use_forcing = True
         kernel.forcing_in_x1 = 5e-10
         return kernel
 
     @staticmethod
     def make_runtime_params():
-        runtime_params = RuntimeParameters()
+        runtime_params = cpu.parameters.RuntimeParameters()
         runtime_params.number_of_threads = 4
         runtime_params.number_of_timesteps = 4_000_000
         runtime_params.timestep_log_interval = 1_000_000
@@ -95,13 +127,13 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase):
 
     @staticmethod
     def make_physical_params():
-        physical_params = PhysicalParameters()
+        physical_params = cpu.parameters.PhysicalParameters()
         physical_params.lattice_viscosity = 1e-4
         return physical_params
 
     @staticmethod
     def make_grid_params():
-        grid_params = GridParameters()
+        grid_params = cpu.parameters.GridParameters()
         grid_params.node_distance = 1
         grid_params.number_of_nodes_per_direction = [1, 1, 16]
         grid_params.blocks_per_direction = [1, 1, 16]
diff --git a/Python/tests/test_boundaryconditions.py b/Python/tests/test_boundaryconditions.py
index e004ddfa21c78ea3d63a89f5dbc3bd7438a18ff1..d914c50cad2051188331b2efe604907091fa731e 100644
--- a/Python/tests/test_boundaryconditions.py
+++ b/Python/tests/test_boundaryconditions.py
@@ -1,5 +1,39 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file test_boundaryconditions.py
+! \ingroup tests
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import unittest
-from pyfluids.cpu.boundaryconditions import *
+from pyfluids import cpu
 
 
 class BoundaryConditionsTest(unittest.TestCase):
@@ -8,13 +42,13 @@ class BoundaryConditionsTest(unittest.TestCase):
         """
         Should be able to create NoSlipBoundaryCondition
         """
-        sut = NoSlipBoundaryCondition()
+        sut = cpu.boundaryconditions.NoSlipBoundaryCondition()
 
     def test__can_create_velocity_bc(self):
         """
         Should be able to create VelocityBoundaryCondition
         """
-        sut = VelocityBoundaryCondition()
+        sut = cpu.boundaryconditions.VelocityBoundaryCondition()
 
     def test__can_create_velocity_bc_with_directions_function_and_time(self):
         """
@@ -24,7 +58,7 @@ class BoundaryConditionsTest(unittest.TestCase):
 
         parser = Parser()
         parser.expression = "1"
-        sut = VelocityBoundaryCondition(True, True, True, parser, 0, 1)
+        sut = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, True, parser, 0, 1)
 
     def test__can_create_velocity_bc_with_directions__function_per_direction__and__time(self):
         """
@@ -40,7 +74,7 @@ class BoundaryConditionsTest(unittest.TestCase):
 
         f3 = Parser()
         f3.expression = "1"
-        sut = VelocityBoundaryCondition(True, True, True, f1, f2, f3, 0, 1)
+        sut = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, True, f1, f2, f3, 0, 1)
 
     def test__can_create_velocity_bc_with_speeds_and_times_per_direction(self):
         """
@@ -51,11 +85,11 @@ class BoundaryConditionsTest(unittest.TestCase):
         start2, end2 = 1, 2
         start3, end3 = 2, 3
 
-        sut = VelocityBoundaryCondition(vx1, start1, end1, vx2, start2, end2, vx3, start3, end3)
+        sut = cpu.boundaryconditions.VelocityBoundaryCondition(vx1, start1, end1, vx2, start2, end2, vx3, start3, end3)
 
     def test__can_create_non_reflecting_outflow(self):
         """
         Should be able to create NonReflectingOutflow
         """
 
-        sut = NonReflectingOutflow()
+        sut = cpu.boundaryconditions.NonReflectingOutflow()
diff --git a/Python/tests/test_geometry.py b/Python/tests/test_geometry.py
index 5bb89eb245b6055653b78fde381da050d402b0cc..3d297f5c176cd99f7969adf37333588d86b77627 100644
--- a/Python/tests/test_geometry.py
+++ b/Python/tests/test_geometry.py
@@ -1,6 +1,40 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file test_geometry.py
+! \ingroup tests
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import unittest
 
-from pyfluids.cpu.geometry import *
+from pyfluids import cpu
 
 
 class TestGeometry(unittest.TestCase):
@@ -9,7 +43,7 @@ class TestGeometry(unittest.TestCase):
         """
         WHEN setting point coordinates in constructor THEN point should have coordinates
         """
-        sut = GbPoint3D(4, 8, 3)
+        sut = cpu.geometry.GbPoint3D(4, 8, 3)
 
         self.assertEqual(sut.x1, 4)
         self.assertEqual(sut.x2, 8)
@@ -19,7 +53,7 @@ class TestGeometry(unittest.TestCase):
         """
         WHEN setting point coordinates THEN point should have coordinates
         """
-        sut = GbPoint3D()
+        sut = cpu.geometry.GbPoint3D()
 
         sut.x1 = 4
         sut.x2 = 8
@@ -33,10 +67,10 @@ class TestGeometry(unittest.TestCase):
         """
         WHEN setting line points THEN line should have points
         """
-        sut = GbLine3D()
+        sut = cpu.geometry.GbLine3D()
 
-        point1 = GbPoint3D()
-        point2 = GbPoint3D()
+        point1 = cpu.geometry.GbPoint3D()
+        point2 = cpu.geometry.GbPoint3D()
         sut.point1 = point1
         sut.point2 = point2
 
diff --git a/Python/tests/test_kernel.py b/Python/tests/test_kernel.py
index 8f58a1c869f9e292856268d43245a75f1dcfe213..e0159bec6802cb08d73214038b177091879fee46 100644
--- a/Python/tests/test_kernel.py
+++ b/Python/tests/test_kernel.py
@@ -1,12 +1,46 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file test_kernel.py
+! \ingroup tests
+! \author Sven Marcus, Henry Korb
+=======================================================================================
+"""
 import unittest
 
-from pyfluids.cpu.kernel import LBMKernel, KernelType
+from pyfluids import cpu
 
 
 class TestLBMKernel(unittest.TestCase):
 
     def setUp(self) -> None:
-        self.sut = LBMKernel(KernelType.BGK)
+        self.sut = cpu.kernel.LBMKernel(cpu.kernel.KernelType.BGK)
 
     def test_lbm_kernel__when_use_forcing_set_to_true__use_forcing_should_be_true(self) -> None:
         """
@@ -57,4 +91,4 @@ class TestLBMKernel(unittest.TestCase):
         """
 
         actual = self.sut.type
-        self.assertEqual(KernelType.BGK, actual)
+        self.assertEqual(cpu.kernel.KernelType.BGK, actual)
diff --git a/apps/cpu/ConvectionOfVortex/CMakeLists.txt b/apps/cpu/ConvectionOfVortex/CMakeLists.txt
index de3034c04bb2f2f16edd9b4bf48db81c83d15b3e..33d60676c7e0dfdde411c3c5b92a2534ea54fbfe 100644
--- a/apps/cpu/ConvectionOfVortex/CMakeLists.txt
+++ b/apps/cpu/ConvectionOfVortex/CMakeLists.txt
@@ -1,3 +1,6 @@
+########################################################
+## C++ PROJECT                                       ###
+########################################################
 PROJECT(ConvectionOfVortex)
 
 vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore basics ${MPI_CXX_LIBRARIES} FILES cov.cpp )
diff --git a/apps/cpu/ConvectionOfVortex/cov.cpp b/apps/cpu/ConvectionOfVortex/cov.cpp
index 627f5d03abe32f43cf3eb33649e0f209595b8b6a..45b9489397df760be5d1247f1f2961393b2c22fe 100644
--- a/apps/cpu/ConvectionOfVortex/cov.cpp
+++ b/apps/cpu/ConvectionOfVortex/cov.cpp
@@ -8,13 +8,15 @@ using namespace std;
 
 void run()
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
-      double availMem = 5e9;
+      real availMem = 5e9;
 
       
 
@@ -24,11 +26,11 @@ void run()
 
       string  pathname = "d:/temp/ConvectionOfVortex_0.003_4th";
       int     endTime = 10000;
-      double  outTime = 10;
-      LBMReal dx =  0.003;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = 8.66025e-6;
-      double yFactor = 1.0;
+      real  outTime = 10;
+      real dx =  0.003;
+      real rhoLB = 0.0;
+      real nuLB = 8.66025e-6;
+      real yFactor = 1.0;
 
       //string  pathname = "d:/temp/ConvectionOfVortex_0.003_square";
       //int     endTime = 20;
@@ -79,13 +81,13 @@ void run()
       int refineLevel = 1;
 
       //bounding box
-      double g_minX1 = -0.045;
-      double g_minX2 = -0.015/yFactor;
-      double g_minX3 = -0.06;
+      real g_minX1 = -0.045;
+      real g_minX2 = -0.015/yFactor;
+      real g_minX3 = -0.06;
 
-      double g_maxX1 = 0.045;
-      double g_maxX2 = 0.015/yFactor;
-      double g_maxX3 = 0.06;
+      real g_maxX1 = 0.045;
+      real g_maxX2 = 0.015/yFactor;
+      real g_maxX3 = 0.06;
 
       vector<int>  blocknx(3);
       blocknx[0] = 10;
@@ -97,7 +99,7 @@ void run()
       if (myid == 0) GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
 
 
-      double blockLength = blocknx[0] * dx;
+      real blockLength = blocknx[0] * dx;
 
       SPtr<Grid3D> grid(new Grid3D(comm));
       grid->setDeltaX(dx);
@@ -150,7 +152,7 @@ void run()
       if (myid==0) GbSystem3D::writeGeoObject(geoOutflow4.get(), pathname+"/geo/geoOutflow4", WbWriterVtkXmlASCII::getInstance());
       SPtr<D3Q27Interactor> outflowIntr4 = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow4, grid, outflowBCAdapter, Interactor3D::SOLID));
 
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
       InteractorsHelper intHelper(grid, metisVisitor);
       //intHelper.addInteractor(outflowIntr1);
       //intHelper.addInteractor(outflowIntr2);
@@ -183,8 +185,8 @@ void run()
       unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
       unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
       unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-      double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
@@ -224,7 +226,7 @@ void run()
 
       intHelper.setBC();
 
-      double Ma = 0.005;
+      real Ma = 0.005;
 
       mu::Parser initRho, initVx1, initVx2; 
       initRho.SetExpr("rhoLB + (-(rho0*epsilon^2)/2) * exp(1-(scaleFactor*(x1^2+x3^2))/R^2) + (1/(2*gamma*rho0)) * ((-(rho0*epsilon^2)/2) * exp(1-(scaleFactor*(x1^2+x3^2))/R^2))^2");
diff --git a/apps/cpu/CouetteFlow/cflow.cpp b/apps/cpu/CouetteFlow/cflow.cpp
index 3de4a3b36f7453eaafca24648d0aa770fb954d63..a60031096a0197e129a6c01c1dd9d5881dc2699f 100644
--- a/apps/cpu/CouetteFlow/cflow.cpp
+++ b/apps/cpu/CouetteFlow/cflow.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -16,24 +18,24 @@ void bflow(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
       //double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       //int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
       //double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
+      real          deltax = config.getValue<real>("deltax");
       //double          cpStep = config.getValue<double>("cpStep");
       //double          cpStepStart = config.getValue<double>("cpStepStart");
       //bool            newStart = config.getValue<bool>("newStart");
-      double          forcing = config.getValue<double>("forcing");
+      real          forcing = config.getValue<real>("forcing");
       //double          n = config.getValue<double>("n");
       //double          k = config.getValue<double>("k");
       //double          tau0 = config.getValue<double>("tau0");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
 
@@ -58,7 +60,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -71,15 +73,15 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1];
       //double g_maxX3 = boundingBox[2];
 
-      double g_minX1 = 0.0;
-      double g_minX2 = -boundingBox[1]/2.0;
-      double g_minX3 = -boundingBox[2]/2.0;
+      real g_minX1 = 0.0;
+      real g_minX2 = -boundingBox[1]/2.0;
+      real g_minX3 = -boundingBox[2]/2.0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1]/2.0;
-      double g_maxX3 = boundingBox[2]/2.0;
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1]/2.0;
+      real g_maxX3 = boundingBox[2]/2.0;
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
 //      double h = (g_maxX2) / 2.0;
 //      double dex = g_maxX1;
@@ -89,16 +91,16 @@ void bflow(string configname)
       //LBMReal n = 0.4;
 
 
-      double d = boundingBox[1];
-      double U = velocity;
-      double Gamma = U / d;
+      real d = boundingBox[1];
+      real U = velocity;
+      real Gamma = U / d;
 
-      double k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
-      double tau0 = 1e-6;// Bn* k* std::pow(Gamma, n);
+      real k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
+      real tau0 = 1e-6;// Bn* k* std::pow(Gamma, n);
 
-      double beta = 14;
-      double c = 10; // 1.0 / 6.0;
-      double mu0 = 1e-4;
+      real beta = 14;
+      real c = 10; // 1.0 / 6.0;
+      real mu0 = 1e-4;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //Herschel-Bulkley
@@ -184,7 +186,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       /////delete solid blocks
       if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -205,8 +207,8 @@ void bflow(string configname)
       unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-      double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp b/apps/cpu/FlowAroundCylinder/cylinder.cpp
index 5578ecb56b37e3b489e4c60d9a26adfa05e9b3d3..d66222495986cc6eaa26c078d7eaf225834ffaab 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp
@@ -9,6 +9,8 @@ using namespace std;
 //////////////////////////////////////////////////////////////////////////
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       //DEBUG///////////////////////////////////////
@@ -18,20 +20,20 @@ void run(string configname)
       config.load(configname);
 
       string          pathOut = config.getValue<string>("pathOut");
-      double          uLB = config.getValue<double>("uLB");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          cpStart = config.getValue<double>("cpStart");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          uLB = config.getValue<real>("uLB");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          cpStart = config.getValue<real>("cpStart");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      vector<double>  nupsStep = config.getVector<double>("nupsStep");
+      vector<real>  nupsStep = config.getVector<real>("nupsStep");
       bool            newStart = config.getValue<bool>("newStart");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blockNx = config.getVector<int>("blockNx");
-      double          dx = config.getValue<double>("dx");
+      real          dx = config.getValue<real>("dx");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -58,18 +60,18 @@ void run(string configname)
 
       
 
-      double L1 = 2.5;
-      double L2, L3, H;
+      real L1 = 2.5;
+      real L2, L3, H;
       L2 = L3 = H = 0.41;
 
-      LBMReal Re = 20.0;
-      LBMReal radius = 0.05;
-      LBMReal rhoReal = 1.0; //kg/m^3
-      LBMReal uReal = 0.45;//m/s
-      LBMReal nueReal = (uReal*radius*2.0)/Re;
+      real Re = 20.0;
+      real radius = 0.05;
+      real rhoReal = 1.0; //kg/m^3
+      real uReal = 0.45;//m/s
+      real nueReal = (uReal*radius*2.0)/Re;
       
-      LBMReal rhoLB = 0.0;
-      LBMReal nueLB = (((4.0/9.0)*uLB)*2.0*(radius/dx))/Re;
+      real rhoLB = 0.0;
+      real nueLB = (((4.0/9.0)*uLB)*2.0*(radius/dx))/Re;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -135,13 +137,13 @@ void run(string configname)
          GbSystem3D::writeGeoObject(refCylinder.get(), pathOut+"/geo/refCylinder", WbWriterVtkXmlBinary::getInstance());
 
          //bounding box
-         double g_minX1 = 0.0;
-         double g_minX2 = 0.0;
-         double g_minX3 = 0.0;
+         real g_minX1 = 0.0;
+         real g_minX2 = 0.0;
+         real g_minX3 = 0.0;
 
-         double g_maxX1 = L1;
-         double g_maxX2 = L2;
-         double g_maxX3 = L3;
+         real g_maxX1 = L1;
+         real g_maxX2 = L2;
+         real g_maxX3 = L3;
 
          SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
          if (myid==0) GbSystem3D::writeGeoObject(gridCube.get(), pathOut+"/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
@@ -150,7 +152,7 @@ void run(string configname)
          const int blocknx2 = blockNx[1];
          const int blocknx3 = blockNx[2];
 
-         double blockLength = blocknx1*dx;
+         real blockLength = blocknx1*dx;
 
          grid->setDeltaX(dx);
          grid->setBlockNX(blocknx1, blocknx2, blocknx3);
@@ -203,7 +205,7 @@ void run(string configname)
          SPtr<D3Q27Interactor> outflowInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow, grid, denBCAdapter, Interactor3D::SOLID));
 
          
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
          InteractorsHelper intHelper(grid, metisVisitor);
          intHelper.addInteractor(cylinderInt);
          intHelper.addInteractor(addWallYminInt);
@@ -223,8 +225,8 @@ void run(string configname)
          unsigned long long numberOfNodesPerBlock = (unsigned long long)(blockNx[0])* (unsigned long long)(blockNx[1])* (unsigned long long)(blockNx[2]);
          unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
          unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blockNx[0]+ghostLayer) * (blockNx[1]+ghostLayer) * (blockNx[2]+ghostLayer);
-         double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(double)+sizeof(int)+sizeof(float)*4));
-         double needMem = needMemAll/double(comm->getNumberOfProcesses());
+         real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(real)+sizeof(int)+sizeof(float)*4));
+         real needMem = needMemAll/real(comm->getNumberOfProcesses());
 
          if (myid==0)
          {
@@ -302,8 +304,8 @@ void run(string configname)
 
 	  SPtr<CoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, stepSch, pathOut, WbWriterVtkXmlBinary::getInstance(), conv, comm));
 
-      double area = (2.0*radius*H)/(dx*dx);
-      double v    = 4.0*uLB/9.0;
+      real area = (2.0*radius*H)/(dx*dx);
+      real v    = 4.0*uLB/9.0;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, pathOut + "/results/forces.txt", comm, v, area);
       fp->addInteractor(cylinderInt);
diff --git a/apps/cpu/HerschelBulkleyModel/hbflow.cpp b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
index b97942a1cd78c4ea9a5c73b4f24ddf4f6ae2edf6..567fd661cd2e131e3f4f311285bd636f471dccb6 100644
--- a/apps/cpu/HerschelBulkleyModel/hbflow.cpp
+++ b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -16,27 +18,27 @@ void bflow(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
-      double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
+      real          nuLB = config.getValue<real>("nuLB");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       //int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
       //double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
+      real          deltax = config.getValue<real>("deltax");
       //double          cpStep = config.getValue<double>("cpStep");
       //double          cpStepStart = config.getValue<double>("cpStepStart");
       //bool            newStart = config.getValue<bool>("newStart");
-      double          forcing = config.getValue<double>("forcing");
+      real          forcing = config.getValue<real>("forcing");
       //double          n = config.getValue<double>("n");
       //double          k = config.getValue<double>("k");
-      double          tau0 = config.getValue<double>("tau0");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
+      real          tau0 = config.getValue<real>("tau0");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
-      double          scaleFactor = config.getValue<double>("scaleFactor");
+      real          scaleFactor = config.getValue<real>("scaleFactor");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -59,7 +61,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -72,17 +74,17 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1];
       //double g_maxX3 = boundingBox[2]+1.0;
 
-      double g_minX1 = 0.0;
-      double g_minX2 = -boundingBox[1]/2.0;
-      double g_minX3 = -boundingBox[2]/2.0;
+      real g_minX1 = 0.0;
+      real g_minX2 = -boundingBox[1]/2.0;
+      real g_minX3 = -boundingBox[2]/2.0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1]/2.0;
-      double g_maxX3 = boundingBox[2]/2.0;
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1]/2.0;
+      real g_maxX3 = boundingBox[2]/2.0;
 
       
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
 //      double h = (g_maxX2) / 2.0;
 //      double dex = g_maxX1;
@@ -92,9 +94,9 @@ void bflow(string configname)
       //LBMReal n = 0.4;
 
 
-      double d = boundingBox[1];
-      double U = velocity;
-      double Gamma = U / d;
+      real d = boundingBox[1];
+      real U = velocity;
+      real Gamma = U / d;
 
       //double scaleFactor = 2.0;
 
@@ -108,7 +110,7 @@ void bflow(string configname)
 
       // Acoustic Scaling
 
-      double k = nuLB * scaleFactor;
+      real k = nuLB * scaleFactor;
       //double tau0 = 3e-5; 
       forcing /= scaleFactor;
       endTime *= scaleFactor;
@@ -116,9 +118,9 @@ void bflow(string configname)
 
       //outTime = endTime;
 
-      double beta = 14;
-      double c = 10; // 1.0 / 6.0;
-      double mu0 = 1e-4;
+      real beta = 14;
+      real c = 10; // 1.0 / 6.0;
+      real mu0 = 1e-4;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //Herschel-Bulkley
@@ -218,7 +220,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
       ////////////////////////////////////////////
       /////delete solid blocks
       if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -240,8 +242,8 @@ void bflow(string configname)
       unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-      double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
diff --git a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
index 67f5a00ad49dcbe16a018e402f85ed02b3848650..ae71a3a44926c52e04eb0df682b0495ce37c173d 100644
--- a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
+++ b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile config;
@@ -16,24 +18,24 @@ void bflow(string configname)
       string          outputPath = config.getValue<string>("outputPath");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
       //double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          radius = config.getValue<double>("radius");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          radius = config.getValue<real>("radius");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
-      double          Re = config.getValue<double>("Re");
-      double          Bn = config.getValue<double>("Bn");
-      vector<double>  sphereCenter = config.getVector<double>("sphereCenter");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
+      real          Re = config.getValue<real>("Re");
+      real          Bn = config.getValue<real>("Bn");
+      vector<real>  sphereCenter = config.getVector<real>("sphereCenter");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -56,19 +58,19 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
       //bounding box
 
-      double g_minX1 = 0;
-      double g_minX2 = 0;
-      double g_minX3 = 0;
+      real g_minX1 = 0;
+      real g_minX2 = 0;
+      real g_minX3 = 0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1];
-      double g_maxX3 = boundingBox[2];
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1];
+      real g_maxX3 = boundingBox[2];
 
       //double g_minX1 = -boundingBox[0]/2.0;
       //double g_minX2 = -boundingBox[1] / 2.0;
@@ -78,21 +80,21 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1]/2.0;
       //double g_maxX3 = boundingBox[2]/2.0;
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
-      double d = 2.0 * radius;
-      double U = velocity;
-      double Gamma = U / d;
+      real d = 2.0 * radius;
+      real U = velocity;
+      real Gamma = U / d;
 
-      double k = (U * d) / (Re * std::pow(Gamma, n - 1));
-      double tau0 = Bn * k * std::pow(Gamma, n);
+      real k = (U * d) / (Re * std::pow(Gamma, n - 1));
+      real tau0 = Bn * k * std::pow(Gamma, n);
 
       //double k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
       //double tau0 = 3e-6; //Bn * k * std::pow(Gamma, n);
 
       //double forcing = 8e-7;
 
-      double omegaMin = 1.0e-8;
+      real omegaMin = 1.0e-8;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       thix->setPowerIndex(n);
@@ -161,7 +163,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -243,7 +245,7 @@ void bflow(string configname)
 
          ////////////////////////////////////////////
          //METIS
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
          ////////////////////////////////////////////
          /////delete solid blocks
          if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -267,8 +269,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -341,7 +343,7 @@ void bflow(string configname)
       SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, outputPath, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
       //writeMQCoProcessor->process(0);
 
-      double area = UbMath::PI*radius*radius;
+      real area = UbMath::PI*radius*radius;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, outputPath + "/forces/forces.txt", comm, velocity, area);
       fp->addInteractor(sphereInt);
diff --git a/apps/cpu/JetBreakup/JetBreakup.cpp b/apps/cpu/JetBreakup/JetBreakup.cpp
index 01d4cc3eb5b7d46118d40bc5fbb98b16e57d82eb..2115b515f1c77cd97b587449ab9881642aa6e1be 100644
--- a/apps/cpu/JetBreakup/JetBreakup.cpp
+++ b/apps/cpu/JetBreakup/JetBreakup.cpp
@@ -6,13 +6,15 @@
 
 using namespace std;
 
-void setInflowBC(double x1, double x2, double x3, double radius, int dir)
+void setInflowBC(real x1, real x2, real x3, real radius, int dir)
 {
 
 }
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
 
         // Sleep(30000);
@@ -27,7 +29,7 @@ void run(string configname)
         vector<int> blocknx = config.getVector<int>("blocknx");
         //vector<double> boundingBox = config.getVector<double>("boundingBox");
         // vector<double>  length = config.getVector<double>("length");
-        double U_LB = config.getValue<double>("U_LB");
+        real U_LB = config.getValue<real>("U_LB");
         // double uF2                         = config.getValue<double>("uF2");
         //double nuL = config.getValue<double>("nuL");
         //double nuG = config.getValue<double>("nuG");
@@ -35,23 +37,23 @@ void run(string configname)
         //double sigma = config.getValue<double>("sigma");
         int interfaceWidth = config.getValue<int>("interfaceWidth");
         //double D          = config.getValue<double>("D");
-        double theta = config.getValue<double>("contactAngle");
-        double D_LB = config.getValue<double>("D_LB");
-        double phiL = config.getValue<double>("phi_L");
-        double phiH = config.getValue<double>("phi_H");
-        double tauH = config.getValue<double>("Phase-field Relaxation");
-        double mob = config.getValue<double>("Mobility");
-
-        double endTime = config.getValue<double>("endTime");
-        double outTime = config.getValue<double>("outTime");
-        double availMem = config.getValue<double>("availMem");
+        real theta = config.getValue<real>("contactAngle");
+        real D_LB = config.getValue<real>("D_LB");
+        real phiL = config.getValue<real>("phi_L");
+        real phiH = config.getValue<real>("phi_H");
+        real tauH = config.getValue<real>("Phase-field Relaxation");
+        real mob = config.getValue<real>("Mobility");
+
+        real endTime = config.getValue<real>("endTime");
+        real outTime = config.getValue<real>("outTime");
+        real availMem = config.getValue<real>("availMem");
         //int refineLevel = config.getValue<int>("refineLevel");
         //double Re = config.getValue<double>("Re");
         
         bool logToFile = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart = config.getValue<double>("cpStart");
-        double cpStep = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart = config.getValue<real>("cpStart");
+        real cpStep = config.getValue<real>("cpStep");
         bool newStart = config.getValue<bool>("newStart");
 
 
@@ -81,7 +83,7 @@ void run(string configname)
 
         // Sleep(30000);
 
-        double rho_h=0, rho_l=0, r_rho=0, mu_h=0, /*mu_l,*/ Uo=0, D=0, sigma=0;
+        real rho_h=0, rho_l=0, r_rho=0, mu_h=0, /*mu_l,*/ Uo=0, D=0, sigma=0;
 
         switch (caseN) {
             case 1: 
@@ -140,23 +142,23 @@ void run(string configname)
                 break;                
         }
 
-        double Re = rho_h * Uo * D / mu_h;
-        double We = rho_h * Uo * Uo * D / sigma;
+        real Re = rho_h * Uo * D / mu_h;
+        real We = rho_h * Uo * Uo * D / sigma;
 
-        double dx = D / D_LB;
-        double nu_h = U_LB * D_LB / Re;
-        double nu_l = nu_h;
+        real dx = D / D_LB;
+        real nu_h = U_LB * D_LB / Re;
+        real nu_l = nu_h;
 
-        double rho_h_LB = 1;
+        real rho_h_LB = 1;
         //surface tension
-        double sigma_LB = rho_h_LB * U_LB * U_LB * D_LB / We;
+        real sigma_LB = rho_h_LB * U_LB * U_LB * D_LB / We;
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
+        real rhoLB = 0.0;
         //LBMReal nuLB = nu_l; //(uLB*dLB) / Re;
 
-        double beta = 12.0 * sigma_LB / interfaceWidth;
-        double kappa = 1.5 * interfaceWidth * sigma_LB;
+        real beta = 12.0 * sigma_LB / interfaceWidth;
+        real kappa = 1.5 * interfaceWidth * sigma_LB;
 
         if (myid == 0) {
             UBLOG(logINFO, "Parameters:");
@@ -219,7 +221,7 @@ void run(string configname)
         grid->setGhostLayerWidth(2);
 
         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(
-            comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+            comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -251,7 +253,7 @@ void run(string configname)
         fctF2.SetExpr("vy1");
         fctF2.DefineConst("vy1", U_LB);
 
-        double startTime = 1;
+        real startTime = 1;
         SPtr<BCAdapter> velBCAdapterF1(
             new MultiphaseVelocityBCAdapter(true, false, false, fctF1, phiH, 0.0, startTime));
         SPtr<BCAdapter> velBCAdapterF2(
@@ -293,17 +295,17 @@ void run(string configname)
             //  if (newStart) {
 
             // bounding box
-            double g_minX1 = 0;
-            double g_minX2 = 0;
-            double g_minX3 = 0;
+            real g_minX1 = 0;
+            real g_minX2 = 0;
+            real g_minX3 = 0;
 
             //double g_maxX1 = 8.0*D;
             //double g_maxX2 = 2.5*D;
             //double g_maxX3 = 2.5*D;
 
-             double g_maxX1 = 1.0 * D; // 8.0 * D;
-             double g_maxX2 = 2.0 * D;
-             double g_maxX3 = 2.0 * D;
+             real g_maxX1 = 1.0 * D; // 8.0 * D;
+             real g_maxX2 = 2.0 * D;
+             real g_maxX3 = 2.0 * D;
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -452,9 +454,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -486,9 +488,9 @@ void run(string configname)
             //mu::Parser fct1;
             //fct1.SetExpr("phiL");
             //fct1.DefineConst("phiL", phiL);
-            LBMReal x1c = 0;  // (g_maxX1 - g_minX1-1)/2; //
-            LBMReal x2c = (g_maxX2 - g_minX2)/2;
-            LBMReal x3c = (g_maxX3 - g_minX3)/2;
+            real x1c = 0;  // (g_maxX1 - g_minX1-1)/2; //
+            real x2c = (g_maxX2 - g_minX2)/2;
+            real x3c = (g_maxX3 - g_minX3)/2;
             
             mu::Parser fct1;
             fct1.SetExpr("0.5-0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
@@ -574,7 +576,7 @@ void run(string configname)
         grid->accept(setConnsVisitor);
 
         SPtr<UbScheduler> visSch(new UbScheduler(outTime));
-        double t_ast, t;
+        real t_ast, t;
         t_ast = 7.19;
         t = (int)(t_ast/(U_LB/(D_LB)));
         visSch->addSchedule(t,t,t); //t=7.19
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 93fd31083a1da92bc5fb73bb0606c7a8121bb5b8..4fb96833162c896eda7229cef913c21d3a6b7c78 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -9,6 +9,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -17,18 +19,18 @@ void run(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      double          uLB = config.getValue<double>("uLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          uLB = config.getValue<real>("uLB");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
-      double          Re = config.getValue<double>("Re");
-      double          dx = config.getValue<double>("dx");
-      vector<double>  length = config.getVector<double>("length");
+      real          Re = config.getValue<real>("Re");
+      real          dx = config.getValue<real>("dx");
+      vector<real>  length = config.getVector<real>("length");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          cpStart = config.getValue<double>("cpStart");
-      double          cpStep = config.getValue<double>("cpStep");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          cpStart = config.getValue<real>("cpStart");
+      real          cpStep = config.getValue<real>("cpStep");
       bool            newStart = config.getValue<bool>("newStart");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
@@ -56,9 +58,9 @@ void run(string configname)
 
       //Sleep(30000);
 
-      LBMReal dLB = length[1] / dx;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = (uLB*dLB) / Re;
+      real dLB = length[1] / dx;
+      real rhoLB = 0.0;
+      real nuLB = (uLB*dLB) / Re;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -108,7 +110,7 @@ void run(string configname)
       kernel->setBCProcessor(bcProc);
 
       //////////////////////////////////////////////////////////////////////////
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
       //restart
       SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
       //SPtr<MPIIOMigrationCoProcessor> migCoProcessor(new MPIIOMigrationCoProcessor(grid, mSch, metisVisitor, pathname + "/mig", comm));
@@ -126,13 +128,13 @@ void run(string configname)
       {
 
          //bounding box
-         double g_minX1 = 0.0;
-         double g_minX2 = -length[1] / 2.0;
-         double g_minX3 = -length[2] / 2.0;
+         real g_minX1 = 0.0;
+         real g_minX2 = -length[1] / 2.0;
+         real g_minX3 = -length[2] / 2.0;
 
-         double g_maxX1 = length[0];
-         double g_maxX2 = length[1] / 2.0;
-         double g_maxX3 = length[2] / 2.0;
+         real g_maxX1 = length[0];
+         real g_maxX2 = length[1] / 2.0;
+         real g_maxX3 = length[2] / 2.0;
 
          //geometry
          //x
@@ -145,7 +147,7 @@ void run(string configname)
          if (myid == 0) GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
 
 
-         double blockLength = blocknx[0] * dx;
+         real blockLength = blocknx[0] * dx;
 
 
 
@@ -235,8 +237,8 @@ void run(string configname)
          unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
          unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
          unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-         double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
diff --git a/apps/cpu/Multiphase/Multiphase.cpp b/apps/cpu/Multiphase/Multiphase.cpp
index 09d74e1473e9fef8e7f29343d758359eaf0752a2..4d4bc9cf82afe16309f22b69c0973acb3f96324c 100644
--- a/apps/cpu/Multiphase/Multiphase.cpp
+++ b/apps/cpu/Multiphase/Multiphase.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
 
         //Sleep(30000);
@@ -20,37 +22,37 @@ void run(string configname)
         string geoFile             = config.getValue<string>("geoFile");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
         // vector<double>  length = config.getVector<double>("length");
-        double uLB = config.getValue<double>("uLB");
+        real uLB = config.getValue<real>("uLB");
         // double uF2                         = config.getValue<double>("uF2");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
-        double sigma           = config.getValue<double>("sigma");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
+        real sigma           = config.getValue<real>("sigma");
         int interfaceWidth = config.getValue<int>("interfaceWidth");
         //double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
-        double gr              = config.getValue<double>("gravity");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real theta           = config.getValue<real>("contactAngle");
+        real gr              = config.getValue<real>("gravity");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
 
-        double beta = 12 * sigma / interfaceWidth;
-        double kappa = 1.5 * interfaceWidth * sigma;
+        real beta = 12 * sigma / interfaceWidth;
+        real kappa = 1.5 * interfaceWidth * sigma;
 
         SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
@@ -76,8 +78,8 @@ void run(string configname)
         // Sleep(30000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         SPtr<LBMUnitConverter> conv(new LBMUnitConverter());
 
@@ -122,7 +124,7 @@ void run(string configname)
         grid->setGhostLayerWidth(2);
 
        
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -154,7 +156,7 @@ void run(string configname)
         fctF2.SetExpr("vy1");
         fctF2.DefineConst("vy1", uLB);
 
-        double startTime = 30;
+        real startTime = 30;
         SPtr<BCAdapter> velBCAdapterF1(new MultiphaseVelocityBCAdapter(true, false, false, fctF1, phiH, 0.0, startTime));
         SPtr<BCAdapter> velBCAdapterF2(new MultiphaseVelocityBCAdapter(true, false, false, fctF2, phiH, startTime, endTime));
 
@@ -199,13 +201,13 @@ void run(string configname)
             double g_maxX2 = length[1] / 2.0;
             double g_maxX3 = length[2] / 2.0;*/
 
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -330,9 +332,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
diff --git a/apps/cpu/MultiphaseDropletTest/droplet.cpp b/apps/cpu/MultiphaseDropletTest/droplet.cpp
index 54b59fcfd8bd93f220b3d3d4ebb5bb29881079e5..a9d561930a77cc447bbe6c959bd7e8464f314d69 100644
--- a/apps/cpu/MultiphaseDropletTest/droplet.cpp
+++ b/apps/cpu/MultiphaseDropletTest/droplet.cpp
@@ -13,6 +13,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
         vf::basics::ConfigurationFile config;
         config.load(configname);
@@ -20,31 +22,31 @@ void run(string configname)
         string pathname            = config.getValue<string>("pathname");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
-        double uLB             = config.getValue<double>("uLB");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
-        double sigma           = config.getValue<double>("sigma");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
+        real uLB             = config.getValue<real>("uLB");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
+        real sigma           = config.getValue<real>("sigma");
         int interfaceThickness = config.getValue<int>("interfaceThickness");
-        double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
+        real radius          = config.getValue<real>("radius");
+        real theta           = config.getValue<real>("contactAngle");
         //double gr              = config.getValue<double>("gravity");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
         //double rStep = config.getValue<double>("rStep");
 
@@ -88,37 +90,37 @@ void run(string configname)
         //Sleep(30000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         //diameter of circular droplet
-        LBMReal D  = 2.0*radius;
+        real D  = 2.0*radius;
 
         //density retio
-        LBMReal r_rho = densityRatio;
+        real r_rho = densityRatio;
 
         //density of heavy fluid
-        LBMReal rho_h = 1.0;
+        real rho_h = 1.0;
         //density of light fluid
-        LBMReal rho_l = rho_h / r_rho;
+        real rho_l = rho_h / r_rho;
 
         //kinimatic viscosity
-        LBMReal nu_h = nuL;
+        real nu_h = nuL;
         //LBMReal nu_l = nuG;
         //#dynamic viscosity
-        LBMReal mu_h = rho_h * nu_h;
+        real mu_h = rho_h * nu_h;
         
         //gravity
-        LBMReal g_y = Re* Re* mu_h* mu_h / (rho_h * (rho_h - rho_l) * D * D * D);
+        real g_y = Re* Re* mu_h* mu_h / (rho_h * (rho_h - rho_l) * D * D * D);
         //Eotvos number
-        LBMReal Eo = 100;
+        real Eo = 100;
         //surface tension
         sigma = rho_h* g_y* D* D / Eo;
 
         //g_y = 0;
 
-        double beta  = 12.0 * sigma / interfaceThickness;
-        double kappa = 1.5 * interfaceThickness * sigma;
+        real beta  = 12.0 * sigma / interfaceThickness;
+        real kappa = 1.5 * interfaceThickness * sigma;
 
         if (myid == 0) {
                 //UBLOG(logINFO, "uLb = " << uLB);
@@ -187,7 +189,7 @@ void run(string configname)
         grid->setPeriodicX3(true);
         grid->setGhostLayerWidth(2);
 
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -206,13 +208,13 @@ void run(string configname)
         if (newStart) {
 
             // bounding box
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -225,7 +227,7 @@ void run(string configname)
             GenBlocksGridVisitor genBlocks(gridCube);
             grid->accept(genBlocks);
 
-            double dx2 = 2.0 * dx;
+            real dx2 = 2.0 * dx;
             GbCuboid3DPtr wallYmin(new GbCuboid3D(g_minX1 - dx2, g_minX2 - dx2, g_minX3 - dx2, g_maxX1 + dx2, g_minX2, g_maxX3 + dx2));
             GbSystem3D::writeGeoObject(wallYmin.get(), pathname + "/geo/wallYmin", WbWriterVtkXmlASCII::getInstance());
             GbCuboid3DPtr wallYmax(new GbCuboid3D(g_minX1 - dx2, g_maxX2, g_minX3 - dx2, g_maxX1 + dx2, g_maxX2 + dx2, g_maxX3 + dx2));
@@ -252,9 +254,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -284,9 +286,9 @@ void run(string configname)
             intHelper.setBC();
 
             // initialization of distributions
-            LBMReal x1c = 2.5 * D; // (g_maxX1 - g_minX1-1)/2; //
-            LBMReal x2c = 12.5 * D; //(g_maxX2 - g_minX2-1)/2;
-            LBMReal x3c = 1.5; //2.5 * D; //(g_maxX3 - g_minX3-1)/2;
+            real x1c = 2.5 * D; // (g_maxX1 - g_minX1-1)/2; //
+            real x2c = 12.5 * D; //(g_maxX2 - g_minX2-1)/2;
+            real x3c = 1.5; //2.5 * D; //(g_maxX3 - g_minX3-1)/2;
             //LBMReal x3c = 2.5 * D;
             mu::Parser fct1;
             fct1.SetExpr("0.5-0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
@@ -355,7 +357,7 @@ void run(string configname)
         grid->accept(setConnsVisitor);
 
         SPtr<UbScheduler> visSch(new UbScheduler(outTime));
-        double t_ast, t;
+        real t_ast, t;
         t_ast = 2;
         t = (int)(t_ast/std::sqrt(g_y/D));
         visSch->addSchedule(t,t,t); //t=2
diff --git a/apps/cpu/Nozzle/nozzle.cpp b/apps/cpu/Nozzle/nozzle.cpp
index 9e8268cafd2bd87fac4fe4cebe11a2d2aafbf534..54a306729006a60ec02c04cf029f529163acbe0f 100644
--- a/apps/cpu/Nozzle/nozzle.cpp
+++ b/apps/cpu/Nozzle/nozzle.cpp
@@ -320,7 +320,7 @@ int main(int argc, char *argv[])
     //    UbLog::output_policy::setStream(logFilename.str());
     //}
 
-    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, vf::lbm::dir::DIR_MMM, MetisPartitioner::RECURSIVE));
     
     SPtr<GbObject3D> gridCube = make_shared <GbCuboid3D>(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3);
     if (myid == 0)
diff --git a/apps/cpu/PoiseuilleFlow/pf1.cpp b/apps/cpu/PoiseuilleFlow/pf1.cpp
index d4d856d51f66a1ac6800e1f2f78da5b219b54488..93680117551c13a23ecc08c5dc4731d92ec78b77 100644
--- a/apps/cpu/PoiseuilleFlow/pf1.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf1.cpp
@@ -7,6 +7,8 @@ using namespace std;
 //pipe flow with forcing
 void pf1()
 {
+    using namespace vf::lbm::dir;
+
    SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
@@ -14,25 +16,25 @@ void pf1()
    string          pathOut = "d:/temp/test_dir_naming";  //"/gfs1/work/niikonst/pflow_pipe_forcing";
    int             numOfThreads = 1;
    int             blocknx[3] ={ 10,10,10 };
-   double          endTime = 10;
-   double          cpStart = 10;
-   double          cpStep = 10;
-   double          outTime = 10;
-   double          availMem = 8e9;
-   double          deltax = 1;
-   double          rhoLB = 0.0;
-   double          nuLB = 0.005;
+   real          endTime = 10;
+   real          cpStart = 10;
+   real          cpStep = 10;
+   real          outTime = 10;
+   real          availMem = 8e9;
+   real          deltax = 1;
+   real          rhoLB = 0.0;
+   real          nuLB = 0.005;
 
    //geometry definition
 
    //simulation bounding box
-   double g_minX1 = 0.0;
-   double g_minX2 = -10.0;
-   double g_minX3 = -10.0;
+   real g_minX1 = 0.0;
+   real g_minX2 = -10.0;
+   real g_minX3 = -10.0;
 
-   double g_maxX1 = 50;
-   double g_maxX2 = 10;
-   double g_maxX3 = 10;
+   real g_maxX1 = 50;
+   real g_maxX2 = 10;
+   real g_maxX3 = 10;
 
    //Sleep(15000);
 
@@ -76,7 +78,7 @@ void pf1()
 
    //set boundary conditions for blocks and create process decomposition for MPI
    SPtr<D3Q27Interactor> cylinderInt(new D3Q27Interactor(cylinder, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
-   SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+   SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
    InteractorsHelper intHelper(grid, metisVisitor);
    intHelper.addInteractor(cylinderInt);
    intHelper.selectBlocks();
@@ -91,8 +93,8 @@ void pf1()
    unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
    unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
    unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-   double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-   double needMem = needMemAll / double(comm->getNumberOfProcesses());
+   real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+   real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
    if (myid == 0)
    {
diff --git a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
index 19677aac71768b7cc0a5acba13e06c22dd6f4658..92495242f11659b3a90b30f4df836642dee5aea8 100644
--- a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
+++ b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
@@ -13,6 +13,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
         vf::basics::ConfigurationFile  config;
         config.load(configname);
@@ -20,31 +22,31 @@ void run(string configname)
         string pathname            = config.getValue<string>("pathname");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
-        double uLB             = config.getValue<double>("uLB");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
+        real uLB             = config.getValue<real>("uLB");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
         //double sigma           = config.getValue<double>("sigma");
         int interfaceThickness = config.getValue<int>("interfaceThickness");
-        double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real radius          = config.getValue<real>("radius");
+        real theta           = config.getValue<real>("contactAngle");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double Eo          = config.getValue<double>("Eo");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real Eo          = config.getValue<real>("Eo");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
         //double rStep = config.getValue<double>("rStep");
 
@@ -88,37 +90,37 @@ void run(string configname)
         //Sleep(20000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         //diameter of circular droplet
-        LBMReal D  = 2.0*radius;
+        real D  = 2.0*radius;
 
         //density retio
         //LBMReal r_rho = densityRatio;
 
         //density of heavy fluid
-        LBMReal rho_h = 1.0;
+        real rho_h = 1.0;
         //density of light fluid
         //LBMReal rho_l = rho_h / r_rho;
 
         //kinimatic viscosity
-        LBMReal nu_h = nuL;
+        real nu_h = nuL;
         //LBMReal nu_l = nuG;
         //#dynamic viscosity
         //LBMReal mu_h = rho_h * nu_h;
         
         //gravity
-        LBMReal g_y = Re * Re * nu_h * nu_h / (D*D*D);
+        real g_y = Re * Re * nu_h * nu_h / (D*D*D);
         //Eotvos number
         //LBMReal Eo = 100;
         //surface tension
-        LBMReal sigma = rho_h * g_y * D * D / Eo;
+        real sigma = rho_h * g_y * D * D / Eo;
 
         //g_y = 0;
 
-        double beta  = 12.0 * sigma / interfaceThickness;
-        double kappa = 1.5 * interfaceThickness * sigma;
+        real beta  = 12.0 * sigma / interfaceThickness;
+        real kappa = 1.5 * interfaceThickness * sigma;
 
         if (myid == 0) {
                 //UBLOG(logINFO, "uLb = " << uLB);
@@ -189,7 +191,7 @@ void run(string configname)
         grid->setPeriodicX3(true);
         grid->setGhostLayerWidth(2);
 
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -208,13 +210,13 @@ void run(string configname)
         if (newStart) {
 
             // bounding box
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -227,7 +229,7 @@ void run(string configname)
             GenBlocksGridVisitor genBlocks(gridCube);
             grid->accept(genBlocks);
 
-            double dx2 = 2.0 * dx;
+            real dx2 = 2.0 * dx;
             GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - dx2, g_minX2 - dx2, g_minX3 - dx2, g_minX1, g_maxX2 + dx2, g_maxX3 + dx2));
             GbSystem3D::writeGeoObject(wallXmin.get(), pathname + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
             GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - dx2, g_minX3 - dx2, g_maxX1 + dx2, g_maxX2 + dx2, g_maxX3 + dx2));
@@ -264,9 +266,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -296,9 +298,9 @@ void run(string configname)
             intHelper.setBC();
 
             // initialization of distributions
-            LBMReal x1c = D; 
-            LBMReal x2c = D; 
-            LBMReal x3c = 1.5; 
+            real x1c = D; 
+            real x2c = D; 
+            real x3c = 1.5; 
             //LBMReal x3c = 2.5 * D;
             mu::Parser fct1;
             fct1.SetExpr("0.5+0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
diff --git a/apps/cpu/ViskomatXL/viskomat.cpp b/apps/cpu/ViskomatXL/viskomat.cpp
index 5256bec7fb7e1b3f67bb48597806c986e60b9698..be1f8bab3f99f5577e5a9ca0b426572a87c5a6af 100644
--- a/apps/cpu/ViskomatXL/viskomat.cpp
+++ b/apps/cpu/ViskomatXL/viskomat.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -18,21 +20,21 @@ void bflow(string configname)
       string          geoFile = config.getValue<string>("geoFile");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          OmegaLB = config.getValue<double>("OmegaLB");
-      double          tau0 = config.getValue<double>("tau0");
-      double          N = config.getValue<double>("N");
-      double          mu = config.getValue<double>("mu");
+      real          OmegaLB = config.getValue<real>("OmegaLB");
+      real          tau0 = config.getValue<real>("tau0");
+      real          N = config.getValue<real>("N");
+      real          mu = config.getValue<real>("mu");
 
 
       vf::basics::ConfigurationFile   viscosity;
@@ -58,22 +60,22 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       //double N  = 70; //rpm
-      double Omega = 2 * UbMath::PI / 60.0 * N; //rad/s
+      real Omega = 2 * UbMath::PI / 60.0 * N; //rad/s
       //double mu    = 5; //Pa s
-      double R     = 0.165 / 2.0; //m
-      double rho = 2150;//  970; //kg/m^3
-      double Re    = Omega * R * R * rho / mu;
+      real R     = 0.165 / 2.0; //m
+      real rho = 2150;// 970; //kg/m^3
+      real Re    = Omega * R * R * rho / mu;
 
       //double nuLB = OmegaLB * R * 1e3 * R * 1e3 / Re;
 
-      double dx = deltax * 1e-3;
-      double nuLB = OmegaLB * (R / dx)*(R / dx) / Re;
+      real dx = deltax * 1e-3;
+      real nuLB = OmegaLB * (R / dx)*(R / dx) / Re;
 
-      double Bm = tau0/(mu*Omega);
-      double tau0LB = Bm*nuLB*OmegaLB;
+      real Bm = tau0/(mu*Omega);
+      real tau0LB = Bm*nuLB*OmegaLB;
 
 
       //double dx = 1.0 * 1e-3;
@@ -89,14 +91,14 @@ void bflow(string configname)
 
       //bounding box
 
-      double g_minX1 = boundingBox[0];
-      double g_maxX1 = boundingBox[1];
+      real g_minX1 = boundingBox[0];
+      real g_maxX1 = boundingBox[1];
 
-      double g_minX2 = boundingBox[2];
-      double g_maxX2 = boundingBox[3];
+      real g_minX2 = boundingBox[2];
+      real g_maxX2 = boundingBox[3];
       
-      double g_minX3 = boundingBox[4];
-      double g_maxX3 = boundingBox[5];
+      real g_minX3 = boundingBox[4];
+      real g_maxX3 = boundingBox[5];
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //thix->setPowerIndex(n);
@@ -191,7 +193,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -255,7 +257,7 @@ void bflow(string configname)
           g_maxX2 + deltax, g_maxX3 + deltax));
       if (myid == 0) GbSystem3D::writeGeoObject(wallXmin.get(), outputPath + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
 
-      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 +  (double)blocknx[0]*deltax,
+      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 +  (real)blocknx[0]*deltax,
           g_maxX2 + deltax, g_maxX3 + deltax));
       if (myid == 0) GbSystem3D::writeGeoObject(wallXmax.get(), outputPath + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
 
@@ -326,8 +328,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index e79d9d13a3763cb5502c24f12fb6ec4d27651814..1506bde80f21bce580c6aa781d1075fdc86bcd62 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -19,24 +21,24 @@ void bflow(string configname)
       vector<int>     blocknx = config.getVector<int>("blocknx");
       //vector<double>  boundingBox = config.getVector<double>("boundingBox");
       //double          nuLB = 1.5e-3;//config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          OmegaLB = config.getValue<double>("OmegaLB");
-      double          tau0 = config.getValue<double>("tau0");
-      double          scaleFactor = config.getValue<double>("scaleFactor");
-      double          resolution = config.getValue<double>("resolution");
+      real          OmegaLB = config.getValue<real>("OmegaLB");
+      real          tau0 = config.getValue<real>("tau0");
+      real          scaleFactor = config.getValue<real>("scaleFactor");
+      real          resolution = config.getValue<real>("resolution");
 
       vf::basics::ConfigurationFile   viscosity;
       viscosity.load(viscosityPath + "/viscosity.cfg");
-      double nuLB = viscosity.getValue<double>("nuLB");
+      real nuLB = viscosity.getValue<real>("nuLB");
 
       //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
@@ -61,7 +63,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       //akoustic
        OmegaLB /= scaleFactor;
@@ -97,13 +99,13 @@ void bflow(string configname)
       //double g_maxX2 = resolution;// boundingBox[1];
       //double g_maxX3 = 1.0; // boundingBox[2];
 
-      double g_minX1 = 0;
-      double g_minX2 = 0;
-      double g_minX3 = 0;
+      real g_minX1 = 0;
+      real g_minX2 = 0;
+      real g_minX3 = 0;
 
-      double g_maxX1 = resolution; // boundingBox[0];
-      double g_maxX2 = resolution; // boundingBox[1];
-      double g_maxX3 = 1.0; // boundingBox[2];
+      real g_maxX1 = resolution; // boundingBox[0];
+      real g_maxX2 = resolution; // boundingBox[1];
+      real g_maxX3 = 1.0; // boundingBox[2];
 
       //double g_minX1 = -boundingBox[0]/2.0;
       //double g_minX2 = -boundingBox[1] / 2.0;
@@ -223,7 +225,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -313,7 +315,7 @@ void bflow(string configname)
 
          ////////////////////////////////////////////
          //METIS
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
          ////////////////////////////////////////////
          /////delete solid blocks
          if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -334,8 +336,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
diff --git a/apps/cpu/sphere/sphere.cpp b/apps/cpu/sphere/sphere.cpp
index bad77ee999d96b89fea43183929ecf4a3b920588..70dcc0ddd189f3906575e92877800ec709199a78 100644
--- a/apps/cpu/sphere/sphere.cpp
+++ b/apps/cpu/sphere/sphere.cpp
@@ -7,6 +7,8 @@ using namespace std;
 ////////////////////////////////////////////////////////////////////////
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
@@ -25,20 +27,20 @@ void run(string configname)
       //const int refineLevel = config.getValue<int>("level");
 
       string outputPath = "d:/temp/sphereBlock_5_SBB";
-      double availMem = 8e9;
-      double outstep = 10000;
-      double endstep = 1e6;
+      real availMem = 8e9;
+      real outstep = 10000;
+      real endstep = 1e6;
       int numOfThreads = 4;
       omp_set_num_threads(numOfThreads);
       int refineLevel = 0;
 
-      LBMReal radius = 5;
-      LBMReal uLB = 1e-3;
-      LBMReal Re = 1;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = (uLB*2.0*radius)/Re;
+      real radius = 5;
+      real uLB = 1e-3;
+      real Re = 1;
+      real rhoLB = 0.0;
+      real nuLB = (uLB*2.0*radius)/Re;
 
-      double dp_LB = 1e-6;
+      real dp_LB = 1e-6;
 //      double rhoLBinflow = dp_LB*3.0;
 
       SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
@@ -46,7 +48,7 @@ void run(string configname)
       SPtr<BCAdapter> slipBCAdapter(new SlipBCAdapter());
       slipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleSlipBCAlgorithm()));
       
-      double H = 50;
+      real H = 50;
       mu::Parser fct;
       fct.SetExpr("U");
       fct.DefineConst("U", uLB);
@@ -67,7 +69,7 @@ void run(string configname)
       bcVisitor.addBC(velBCAdapter);
       bcVisitor.addBC(denBCAdapter);
 
-      double dx = 1;
+      real dx = 1;
 
       const int blocknx1 = 50;
       const int blocknx2 = 50;
@@ -77,7 +79,7 @@ void run(string configname)
       const int gridNx2 = H;
       const int gridNx3 = H;
 
-      double L1, L2, L3;
+      real L1, L2, L3;
       L1 = gridNx1;
       L2 = gridNx2;
       L3 = gridNx3;
@@ -97,15 +99,15 @@ void run(string configname)
       if (true)
       {
          //bounding box
-         double d_minX1 = 0.0;
-         double d_minX2 = 0.0;
-         double d_minX3 = 0.0;
+         real d_minX1 = 0.0;
+         real d_minX2 = 0.0;
+         real d_minX3 = 0.0;
 
-         double d_maxX1 = L1;
-         double d_maxX2 = L2;
-         double d_maxX3 = L3;
+         real d_maxX1 = L1;
+         real d_maxX2 = L2;
+         real d_maxX3 = L3;
 
-         double blockLength = blocknx1*dx;
+         real blockLength = blocknx1*dx;
 
          if (myid == 0)
          {
@@ -126,7 +128,7 @@ void run(string configname)
          GenBlocksGridVisitor genBlocks(gridCube);
          grid->accept(genBlocks);
 
-         double off = 0.0;
+         real off = 0.0;
          SPtr<GbObject3D> refCube(new GbCuboid3D(sphere->getX1Minimum() - off, sphere->getX2Minimum() - off, sphere->getX3Minimum(),
             sphere->getX1Maximum() + off, sphere->getX2Maximum() + off, sphere->getX3Maximum()));
          if (myid == 0) GbSystem3D::writeGeoObject(refCube.get(), outputPath + "/geo/refCube", WbWriterVtkXmlBinary::getInstance());
@@ -180,7 +182,7 @@ void run(string configname)
          //outflow
          SPtr<D3Q27Interactor> outflowInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow, grid, denBCAdapter, Interactor3D::SOLID));
 
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
          InteractorsHelper intHelper(grid, metisVisitor);
          intHelper.addInteractor(sphereInt);
          intHelper.addInteractor(addWallYminInt);
@@ -202,8 +204,8 @@ void run(string configname)
          int gl = 3;
          unsigned long nod = nob * (blocknx1 + gl) * (blocknx2 + gl) * (blocknx3 + gl);
 
-         double needMemAll = double(nod*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nod*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -285,7 +287,7 @@ void run(string configname)
       SPtr<UbScheduler> nupsSch(new UbScheduler(10, 30, 100));
       SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
 
-      double area = UbMath::PI * radius * radius;
+      real area = UbMath::PI * radius * radius;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, outputPath + "/forces/forces.txt", comm, uLB, area);
       fp->addInteractor(sphereInt);
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 58e5aede18b9c4197b4d21b129c6347023b9390e..9d982ebac0059b4512041194100f6e1fdfa61924 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -1,4 +1,35 @@
-
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ActuatorLine.cpp
+//! \ingroup ActuatorLine
+//! \author Henry Korb, Henrik Asmuth
+//=======================================================================================
 #define _USE_MATH_DEFINES
 #include <math.h>
 #include <string>
@@ -28,12 +59,14 @@
 #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
 #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
 #include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+
 #include "GridGenerator/grid/GridFactory.h"
 
 #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
 #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
-#include "GridGenerator/io/STLReaderWriter/STLReader.h"
-#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
+
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -44,10 +77,12 @@
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
-#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -63,26 +98,10 @@
 
 LbmOrGks lbmOrGks = LBM;
 
-const real reference_diameter = 126.0; // diameter in m
-
-const real L_x = 10*reference_diameter;
-const real L_y = 6*reference_diameter;
-const real L_z = 6*reference_diameter;
-
-const real viscosity = 1.56e-5;
-
-const real velocity  = 9.0;
-
-const real mach = 0.1;
-
-const uint nodes_per_diameter = 16;
-
 std::string path(".");
 
 std::string simulationName("ActuatorLine");
 
-const float tOut = 100;
-const float tEnd = 280; // total time of simulation in s
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -98,30 +117,59 @@ void multipleLevel(const std::string& configPath)
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    vf::basics::ConfigurationFile config;
+    config.load(configPath);
+
+    const real reference_diameter = config.getValue<real>("ReferenceDiameter");
+    const uint nodes_per_diameter = config.getValue<uint>("NodesPerDiameter");
+    const real velocity = config.getValue<real>("Velocity");
+
+
+    const real L_x = 24*reference_diameter;
+    const real L_y = 6*reference_diameter;
+    const real L_z = 6*reference_diameter;
+
+    const real viscosity = 1.56e-5;
+
+    const real mach = 0.1;
+
+
+    const float tStartOut   = config.getValue<real>("tStartOut");
+    const float tOut        = config.getValue<real>("tOut");
+    const float tEnd        = config.getValue<real>("tEnd"); // total time of simulation
+
+    const float tStartAveraging     =  config.getValue<real>("tStartAveraging");
+    const float tStartTmpAveraging  =  config.getValue<real>("tStartTmpAveraging");
+    const float tAveraging          =  config.getValue<real>("tAveraging");
+    const float tStartOutProbe      =  config.getValue<real>("tStartOutProbe");
+    const float tOutProbe           =  config.getValue<real>("tOutProbe");
+        
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
+    GridScalingFactory scalingFactory  = GridScalingFactory();
+
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	const real dx = reference_diameter/real(nodes_per_diameter);
 
+    real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter};
+
 	gridBuilder->addCoarseGrid(0.0, 0.0, 0.0,
 							   L_x,  L_y,  L_z, dx);
 
+    gridBuilder->setNumberOfLayers(4,0);
+    gridBuilder->addGrid( new Cuboid(   turbPos[0]-1.5*reference_diameter,  turbPos[1]-1.5*reference_diameter,  turbPos[2]-1.5*reference_diameter, 
+                                        turbPos[0]+10.0*reference_diameter, turbPos[1]+1.5*reference_diameter,  turbPos[2]+1.5*reference_diameter) , 1 );
+    para->setMaxLevel(2);
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+
 	gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
 	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
 
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    vf::basics::ConfigurationFile config;
-    config.load(configPath);
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
-    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     const real dt = dx * mach / (sqrt(3) * velocity);
 
@@ -140,14 +188,11 @@ void multipleLevel(const std::string& configPath)
 
     para->setPrintFiles(true);
 
-    para->setMaxLevel(1);
-
-
     para->setVelocityLB(velocityLB);
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
-    para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("CumulantK17");
 
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
@@ -156,13 +201,15 @@ void multipleLevel(const std::string& configPath)
         vz  = (real)0.0;
     });
 
+    para->setTimestepStartOut( uint(tStartOut/dt) );
     para->setTimestepOut( uint(tOut/dt) );
     para->setTimestepEnd( uint(tEnd/dt) );
 
     para->setIsBodyForce( true );
-
+    para->setUseStreams( true );
 
     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
     gridBuilder->setVelocityBoundaryCondition(SideType::MX,  velocityLB,  0.0, 0.0);
 
     gridBuilder->setVelocityBoundaryCondition(SideType::MY,  velocityLB,  0.0, 0.0);
@@ -172,42 +219,52 @@ void multipleLevel(const std::string& configPath)
     gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
 
     bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
-    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
+
+    SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para);
+    tmFactory->readConfigFile(config);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter};
-    real epsilon = 5.f; // width of gaussian smearing
-    real density = 1.225f;
-    int level = 0;
-    uint nBlades = 3;
-    uint nBladeNodes = 32;
+    int level = 1; // grid level at which the turbine samples velocities and distributes forces
+    const real epsilon = dx*exp2(-level)*1.5; // width of gaussian smearing
+    const real density = 1.225f;
+    const uint nBlades = 3;
+    const uint nBladeNodes = 32;
+    const real tipspeed_ratio = 7.5f; // tipspeed ratio = angular vel * radius / inflow vel
+    const real omega = 2*tipspeed_ratio*velocity/reference_diameter;
+    
+
+    SPtr<ActuatorFarm> actuator_farm = std::make_shared<ActuatorFarm>(nBlades, density, nBladeNodes, epsilon, level, dt, dx, true);
+    std::vector<real> bladeRadii;
+    real dr = reference_diameter/(nBladeNodes*2);
+    for(uint node=0; node<nBladeNodes; node++){ bladeRadii.emplace_back(dr*(node+1)); }
+    actuator_farm->addTurbine(turbPos[0], turbPos[1], turbPos[2], reference_diameter, omega, 0, 0, bladeRadii);
+    para->addActuator( actuator_farm );
 
-    SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) );
-    para->addActuator( actuator_line );
 
-    SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 1, 500, 100) );
-    std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
-    std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-    std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-    pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
-    // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
+    // SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>("pointProbe", para->getOutputPath(), 100, 1, 500, 100);
+    // std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
+    // std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+    // std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+    // pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
+    // // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
 
-    pointProbe->addStatistic(Statistic::Means);
-    pointProbe->addStatistic(Statistic::Variances);
-    para->addProbe( pointProbe );
+    // pointProbe->addStatistic(Statistic::Means);
+    // pointProbe->addStatistic(Statistic::Variances);
+    // para->addProbe( pointProbe );
 
-    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100, 100) );
-    planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
-    planeProbe->addStatistic(Statistic::Means);
-    para->addProbe( planeProbe );
+    // SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), 100, 500, 100, 100);
+    // planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
+    // planeProbe->addStatistic(Statistic::Means);
+    // para->addProbe( planeProbe );
 
 
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
 
     auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory, &scalingFactory);
     sim.run();
 }
 
diff --git a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
index 233994f0d32a48190d84f7044500e24b06b926a9..5799f24716777295b2f835ab00561ff767ba87b9 100644
--- a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
+++ b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
@@ -6,3 +6,29 @@ Path = .
 #informations for reading
 ##################################################
 GridPath=.
+##################################################
+ReferenceDiameter=126
+NodesPerDiameter=32
+Velocity=9
+##################################################
+tStartOut=100
+tOut=100
+tEnd=1000
+##################################################
+
+tStartTmpAveraging=100
+tStartAveraging=100
+tAveraging=100
+tTmpAveraging=100
+tStartOutProbe=100
+tOutProbe=100
+
+##################################################
+#TurbulenceModel = QR
+#SGSconstant = 0.3333333
+#
+#QuadricLimiterP = 100000.0
+#QuadricLimiterM = 100000.0
+#QuadricLimiterD = 100000.0
+##################################################
+
diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index 991025b649d69305c030fe2f1dd1763a2137af9b..5fc31904433bfe2df0722ab1c63f574d3fcb9a35 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -1,4 +1,35 @@
-
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BoundaryLayer.cpp
+//! \ingroup BoundaryLayer
+//! \author Henry Korb, Henrik Asmuth
+//=======================================================================================
 #define _USE_MATH_DEFINES
 #include <math.h>
 #include <string>
@@ -8,6 +39,7 @@
 #include <fstream>
 #include <exception>
 #include <memory>
+#include <numeric>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -19,6 +51,7 @@
 #include "Core/VectorTypes.h"
 
 #include <basics/config/ConfigurationFile.h>
+#include "lbm/constants/NumericConstants.h"
 
 #include <logger/Logger.h>
 
@@ -28,12 +61,16 @@
 #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
 #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
 #include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+
 #include "GridGenerator/grid/GridFactory.h"
 
+#include "geometries/Cuboid/Cuboid.h"
+#include "geometries/TriangularMesh/TriangularMesh.h"
+
 #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
 #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
-#include "GridGenerator/io/STLReaderWriter/STLReader.h"
-#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -44,24 +81,28 @@
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
-#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
+#include "utilities/communication.h"
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 std::string path(".");
 
-std::string simulationName("BoundayLayer");
+std::string simulationName("BoundaryLayer");
 
+using namespace vf::lbm::constant;
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -87,8 +128,16 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
     SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
+    GridScalingFactory scalingFactory  = GridScalingFactory();
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+    const int  nProcs = communicator.getNummberOfProcess();
+    const uint procID = vf::gpu::Communicator::getInstance().getPID();
+    std::vector<uint> devices(10);
+    std::iota(devices.begin(), devices.end(), 0);
+    para->setDevices(devices);
+    para->setMaxDev(nProcs);
+    
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //
@@ -100,23 +149,45 @@ void multipleLevel(const std::string& configPath)
 
     LbmOrGks lbmOrGks = LBM;
 
-    const real H = 1000.0; // boundary layer height in m
+    const real H = config.getValue("boundaryLayerHeight", 1000.0); // boundary layer height in m
 
     const real L_x = 6*H;
     const real L_y = 4*H;
-    const real L_z = 1*H;
+    const real L_z = H;
+
+    const real z0  = config.getValue("z0", 0.1f); // roughness length in m
+    const real u_star = config.getValue("u_star", 0.4f); //friction velocity in m/s
+    const real kappa = config.getValue("vonKarmanConstant", 0.4f); // von Karman constant
 
-    const real z0  = 0.1; // roughness length in m
-    const real u_star = 0.4; //friction velocity in m/s
-    const real kappa = 0.4; // von Karman constant
+    const real viscosity = config.getValue("viscosity", 1.56e-5f);
 
-    const real viscosity = 1.56e-5;
+    const real velocity  = 0.5f*u_star/kappa*log(H/z0+1.f); //0.5 times max mean velocity at the top in m/s
 
-    const real velocity  = 0.5*u_star/kappa*log(L_z/z0); //0.5 times max mean velocity at the top in m/s
+    const real mach = config.getValue<real>("Ma", 0.1);
 
-    const real mach = config.contains("Ma")? config.getValue<real>("Ma"): 0.1;
+    const uint nodes_per_H = config.getValue<uint>("nz", 64);
 
-    const uint nodes_per_H = config.contains("nz")? config.getValue<uint>("nz"): 64;
+    const bool writePrecursor = config.getValue("writePrecursor", false);
+    bool useDistributions;
+    std::string precursorDirectory;
+    int nTWritePrecursor; real tStartPrecursor, posXPrecursor;
+    if(writePrecursor)
+    {
+        nTWritePrecursor     = config.getValue<int>("nTimestepsWritePrecursor");
+        tStartPrecursor      = config.getValue<real>("tStartPrecursor");
+        posXPrecursor        = config.getValue<real>("posXPrecursor");
+        useDistributions     = config.getValue<bool>("useDistributions", false);
+        precursorDirectory   = config.getValue<std::string>("precursorDirectory");
+    }
+
+    const bool readPrecursor = config.getValue("readPrecursor", false);
+    int timestepsBetweenReadsPrecursor;
+    if(readPrecursor)
+    {
+        timestepsBetweenReadsPrecursor = config.getValue<int>("nTimestepsReadPrecursor");
+        precursorDirectory = config.getValue<std::string>("precursorDirectory");
+        useDistributions     = config.getValue<bool>("useDistributions", false);
+    }
 
     // all in s
     const float tStartOut   = config.getValue<real>("tStartOut");
@@ -130,7 +201,7 @@ void multipleLevel(const std::string& configPath)
     const float tOutProbe           =  config.getValue<real>("tOutProbe");
 
 
-    const real dx = L_z/real(nodes_per_H);
+    const real dx = H/real(nodes_per_H);
 
     const real dt = dx * mach / (sqrt(3) * velocity);
 
@@ -155,15 +226,17 @@ void multipleLevel(const std::string& configPath)
 
     para->setPrintFiles(true);
 
-    para->setForcing(pressureGradientLB, 0, 0);
+    if(!readPrecursor) para->setForcing(pressureGradientLB, 0, 0);
     para->setVelocityLB(velocityLB);
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
     para->setDensityRatio( 1.0 );
 
-    para->setMainKernel("TurbulentViscosityCumulantK17CompChim");
-
+    bool useStreams = (nProcs > 1 ? true: false);
+    // useStreams=false;
+    para->setUseStreams(useStreams);
+    para->setMainKernel("CumulantK17");
     para->setIsBodyForce( config.getValue<bool>("bodyForce") );
 
     para->setTimestepStartOut(uint(tStartOut/dt) );
@@ -172,64 +245,206 @@ void multipleLevel(const std::string& configPath)
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    SPtr<TurbulenceModelFactory> tmFactory = SPtr<TurbulenceModelFactory>( new TurbulenceModelFactory(para) );
+    SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para);
     tmFactory->readConfigFile( config );
+    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    const real xSplit = L_x/nProcs;
+    const real overlap = 8.0*dx;
+
+    real xMin      =  procID    * xSplit;
+    real xMax      = (procID+1) * xSplit;
+    real xGridMin  =  procID    * xSplit;
+    real xGridMax  = (procID+1) * xSplit;
+
+    real yMin      = 0.0;
+    real yMax      = L_y;
+    real zMin      = 0.0;
+    real zMax      = L_z; 
+
+    bool isFirstSubDomain = (procID == 0        && nProcs > 1)?                    true: false;
+    bool isLastSubDomain  = (procID == nProcs-1 && nProcs > 1)?                    true: false;
+    bool isMidSubDomain   = (!isFirstSubDomain && !isLastSubDomain && nProcs > 1)? true: false;
     
-    // tmFactory->setTurbulenceModel(TurbulenceModel::AMD);
-    // tmFactory->setModelConstant(config.getValue<real>("SGSconstant"));
+    if(isFirstSubDomain)
+    {
+        xGridMax += overlap;
+        if(!readPrecursor) xGridMin -= overlap;
+    }
+    if(isLastSubDomain)
+    {
+        xGridMin -= overlap;
+        if(!readPrecursor) xGridMax += overlap;
+    }
+    if(isMidSubDomain)
+    {
+        xGridMax += overlap;
+        xGridMin -= overlap;
+    }
 
-    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    gridBuilder->addCoarseGrid( xGridMin,  0.0,  0.0,
+                                xGridMax,  L_y,  L_z, dx);
+    if(true)// Add refinement
+    {
+        gridBuilder->setNumberOfLayers(4,0);
+        real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax;   //Stop refinement some distance before outlet if domain ist not periodic
+        gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
+        para->setMaxLevel(2);
+        scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+    }
+
+    if(nProcs > 1)
+    {
+            gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xMin, xMax, yMin, yMax, zMin, zMax));        
+            gridBuilder->setPeriodicBoundaryCondition(false, true, false);
+    }
+    else         
+    { 
+        gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false);
+    }
 
-    gridBuilder->addCoarseGrid(0.0, 0.0, 0.0,
-                                L_x,  L_y,  L_z, dx);
-    // gridBuilder->setNumberOfLayers(12, 8);
+	gridBuilder->buildGrids(lbmOrGks, true); // buildGrids() has to be called before setting the BCs!!!!
 
-    // gridBuilder->addGrid( new Cuboid( 0.0, 0.0, 0.0, L_x,  L_y,  0.3*L_z) , 1 );
-    // para->setMaxLevel(2);
+    std::cout << "nProcs: "<< nProcs << "Proc: " << procID << " isFirstSubDomain: " << isFirstSubDomain << " isLastSubDomain: " << isLastSubDomain << " isMidSubDomain: " << isMidSubDomain << std::endl;
+    
+    if(nProcs > 1){
+        if (isFirstSubDomain || isMidSubDomain) {
+            gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks);
+            gridBuilder->setCommunicationProcess(CommunicationDirections::PX, procID+1);
+        }
 
-    gridBuilder->setPeriodicBoundaryCondition(true, true, false);
+        if (isLastSubDomain || isMidSubDomain) {
+            gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks);
+            gridBuilder->setCommunicationProcess(CommunicationDirections::MX, procID-1);
+        }
 
-	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
+        if (isFirstSubDomain && !readPrecursor) {
+            gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks);
+            gridBuilder->setCommunicationProcess(CommunicationDirections::MX, nProcs-1);
+        }
 
+        if (isLastSubDomain && !readPrecursor) {
+            gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks);
+            gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 0);
+        }
+    }
     uint samplingOffset = 2;
-    // gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
+    
+    std::cout << " precursorDirectory " << precursorDirectory << std::endl;
+    
+    if(readPrecursor)
+    {
+        if(isFirstSubDomain || nProcs == 1)
+        {   
+            auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK);
+            gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, timestepsBetweenReadsPrecursor);
+            // gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
+        }
+
+        if(isLastSubDomain || nProcs == 1)
+        {
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.f);
+        }     
+    } 
+
     gridBuilder->setStressBoundaryCondition(SideType::MZ,
                                             0.0, 0.0, 1.0,              // wall normals
-                                            samplingOffset, z0/dx);     // wall model settinng
-    para->setHasWallModelMonitor(true);
-    bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
+                                            samplingOffset, z0, dx);     // wall model settinng
+    para->setHasWallModelMonitor(true);   
+    gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0f,  0.0f, -1.0f); 
 
-    gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0,  0.0, 0.0);
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
+    bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
     bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); 
-    
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
+    bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor);
+    para->setOutflowPressureCorrectionFactor(0.0); 
 
-    real cPi = 3.1415926535897932384626433832795;
-    para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+    if(readPrecursor)
+    {
+        para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
-        vx  = (u_star/0.4 * log(coordZ/z0) + 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1))  * dt / dx; 
-        vy  = 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)  * dt / dx; 
-        vz  = 8.0*u_star/0.4*(sin(cPi*8.0*coordY/H)*sin(cPi*8.0*coordZ/H)+sin(cPi*8.0*coordX/L_x))/(pow(L_z/2.0-coordZ, c2o1)+c1o1) * dt / dx;
-    });
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        vx  = rho = c0o1;
+        vx  = u_star/c4o10*(u_star/c4o10 * log(coordZ/z0+c1o1)) * dt/dx; 
+        vy  = c0o1; 
+        vz  = c0o1;
+        });
+    }
+    else
+    {
+        para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+        rho = (real)0.0;
+        vx  = rho = c0o1;
+        vx  = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt/dx; 
+        vy  = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt/dx; 
+        vz  = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) * dt/dx;
+        });
+    }
+
+
 
-    SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') );
-    planarAverageProbe->addAllAvailableStatistics();
-    planarAverageProbe->setFileNameToNOut();
-    para->addProbe( planarAverageProbe );
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    if(!readPrecursor && (isFirstSubDomain || nProcs == 1))
+    {
+        SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') );
+        planarAverageProbe->addAllAvailableStatistics();
+        planarAverageProbe->setFileNameToNOut();
+        para->addProbe( planarAverageProbe );
+
+        para->setHasWallModelMonitor(true);
+        SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) );
+        wallModelProbe->addAllAvailableStatistics();
+        wallModelProbe->setFileNameToNOut();
+        wallModelProbe->setForceOutputToStress(true);
+        if(para->getIsBodyForce())
+            wallModelProbe->setEvaluatePressureGradient(true);
+        para->addProbe( wallModelProbe );
+    }
+
+    SPtr<PlaneProbe> planeProbe1 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_1", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+    planeProbe1->setProbePlane(100.0, 0.0, 0, dx, L_y, L_z);
+    planeProbe1->addAllAvailableStatistics();
+    para->addProbe( planeProbe1 );
+
+    if(readPrecursor)
+    {
+        SPtr<PlaneProbe> planeProbe2 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_2", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe2->setProbePlane(1000.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe2->addAllAvailableStatistics();
+        para->addProbe( planeProbe2 );
+
+        SPtr<PlaneProbe> planeProbe3 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_3", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe3->setProbePlane(1500.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe3->addAllAvailableStatistics();
+        para->addProbe( planeProbe3 );
+
+        SPtr<PlaneProbe> planeProbe4 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_4", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe4->setProbePlane(2000.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe4->addAllAvailableStatistics();
+        para->addProbe( planeProbe4 );
+
+        SPtr<PlaneProbe> planeProbe5 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_5", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe5->setProbePlane(2500.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe5->addAllAvailableStatistics();
+        para->addProbe( planeProbe5 );
+
+        SPtr<PlaneProbe> planeProbe6 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_6", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe6->setProbePlane(0.0, L_y/2.0, 0, L_x, dx, L_z);
+        planeProbe6->addAllAvailableStatistics();
+        para->addProbe( planeProbe6 );
+    }
 
-    para->setHasWallModelMonitor(true);
-    SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) );
-    wallModelProbe->addAllAvailableStatistics();
-    wallModelProbe->setFileNameToNOut();
-    wallModelProbe->setForceOutputToStress(true);
-    if(para->getIsBodyForce())
-        wallModelProbe->setEvaluatePressureGradient(true);
-    para->addProbe( wallModelProbe );
+    if(writePrecursor)
+    {
+        SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities, 1000);
+        para->addProbe(precursorWriter);
+    }
 
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory);
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory, &scalingFactory);
     sim.run();
 }
 
diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
index a489f0ab89738a193b16fee41c212a5943f6525d..83e7861a5fb85ea800d187699f1c6c1409422f0a 100644
--- a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
+++ b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
@@ -7,7 +7,7 @@ Path = .
 ##################################################
 GridPath = .
 ##################################################
-Devices = 1 
+Devices = 0 
 ##################################################
 tStartOut           = 0
 tOut                = 100000
@@ -28,3 +28,15 @@ SGSconstant = 0.2
 QuadricLimiterP = 100000.0
 QuadricLimiterM = 100000.0
 QuadricLimiterD = 100000.0
+
+##################################################
+readPrecursor = false
+nTimestepsReadPrecursor = 10
+precursorFile = precursor/Precursor
+
+##################################################
+writePrecursor = false
+nTimestepsWritePrecursor = 10
+
+tStartPrecursor = 100
+posXPrecursor = 3000
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 69ecb3d8cbd45a8a7419437e934a57bd20b0bc9f..5e1cab7f48f7fb672c85f0decee4bcc2d4ac158f 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -85,7 +85,7 @@ int main()
         const real L = 1.0;
         const real Re = 1000.0;
         const real velocity = 1.0;
-        const real dt = (real)0.5e-3;
+        const real velocityLB = 0.05; // LB units
         const uint nx = 64;
 
         const uint timeStepOut = 1000;
@@ -109,10 +109,20 @@ int main()
         auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
         //////////////////////////////////////////////////////////////////////////
-        // create grid
+        // compute parameters in lattice units
         //////////////////////////////////////////////////////////////////////////
 
-        real dx = L / real(nx);
+        const real dx = L / real(nx);
+        const real dt  = velocityLB / velocity * dx;
+
+        const real vxLB = velocityLB / sqrt(2.0); // LB units
+        const real vyLB = velocityLB / sqrt(2.0); // LB units
+
+        const real viscosityLB = nx * velocityLB / Re; // LB units
+
+        //////////////////////////////////////////////////////////////////////////
+        // create grid
+        //////////////////////////////////////////////////////////////////////////
 
         gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
 
@@ -124,17 +134,6 @@ int main()
 
         gridBuilder->buildGrids(LbmOrGks::LBM, false);
 
-        //////////////////////////////////////////////////////////////////////////
-        // compute parameters in lattice units
-        //////////////////////////////////////////////////////////////////////////
-
-        const real velocityLB = velocity * dt / dx; // LB units
-
-        const real vxLB = velocityLB / sqrt(2.0); // LB units
-        const real vyLB = velocityLB / sqrt(2.0); // LB units
-
-        const real viscosityLB = nx * velocityLB / Re; // LB units
-
         //////////////////////////////////////////////////////////////////////////
         // set parameters
         //////////////////////////////////////////////////////////////////////////
@@ -154,7 +153,7 @@ int main()
         para->setTimestepOut(timeStepOut);
         para->setTimestepEnd(timeStepEnd);
 
-        para->setMainKernel("CumulantK17CompChimRedesigned");
+        para->setMainKernel("CumulantK17");
 
         //////////////////////////////////////////////////////////////////////////
         // set boundary conditions
@@ -164,8 +163,8 @@ int main()
         gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
         gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
 
         BoundaryConditionFactory bcFactory;
 
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
index 8ca6939924fcfba22c8b96f000b9d8d05a3f7f43..ed6b4da7a3218e4d89ac90b053d9c054e4dd8205 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
@@ -50,6 +50,7 @@
 #include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
 #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -57,19 +58,6 @@
 
 #include "utilities/communication.h"
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const std::string outPath("output/DrivenCavity_Results/");
-const std::string gridPath = "output/DrivenCavity_Results/grid/";
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -92,15 +80,12 @@ void multipleLevel(std::filesystem::path& configPath)
     config.load(configPath.string());
     SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
+    GridScalingFactory scalingFactory = GridScalingFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator = true;
     bool useLevels        = true;
-    // para->setUseStreams(useStreams);                  // set in config
-    // para->useReducedCommunicationAfterFtoC = true;    // set in config
-    para->setCalcTurbulenceIntensity(false);
 
     if (para->getNumprocs() == 1) {
         para->useReducedCommunicationAfterFtoC = false;
@@ -108,47 +93,40 @@ void multipleLevel(std::filesystem::path& configPath)
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    const real L        = 1.0;
-    const real Re       = 1000.0; // 1000
+    const std::string outPath("output/");
+    const std::string gridPath = "output/";
+    std::string simulationName("DrivenCavityMultiGPU");
+
+    const real L = 1.0;
+    const real Re = 1000.0;
     const real velocity = 1.0;
-    const real dt       = (real)1.0e-3; // 0.5e-3;
-    const uint nx       = 64;
-    std::string simulationName("DrivenCavityChimMultiGPU");
+    const real velocityLB = 0.05; // LB units
+    const uint nx = 64;
 
     // para->setTimestepOut(10000);   // set in config
     // para->setTimestepEnd(10000);   // set in config
 
     const real dxGrid      = L / real(nx);
-    const real velocityLB  = velocity * dt / dxGrid;       // LB units
+    const real dt  = velocityLB / velocity * dxGrid;
     const real vxLB        = velocityLB / (real)sqrt(2.0); // LB units
     const real vyLB        = velocityLB / (real)sqrt(2.0); // LB units
     const real viscosityLB = nx * velocityLB / Re;         // LB units
 
-    para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
-        rho = (real)1.0;
-        vx  = (real)(coordX * velocityLB);
-        vy  = (real)(coordY * velocityLB);
-        vz  = (real)(coordZ * velocityLB);
-    });
-
     para->setVelocityLB(velocityLB);
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio(velocity / velocityLB);
-    para->setDensityRatio((real)1.0); // correct value?
+    para->setDensityRatio((real)1.0);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    para->setCalcDragLift(false);
-    para->setUseWale(false);
-
     if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setOutputPrefix(simulationName);
 
     para->setPrintFiles(true);
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
+    para->setMainKernel("CumulantK17");
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -157,7 +135,7 @@ void multipleLevel(std::filesystem::path& configPath)
     VF_LOG_INFO("velocity LB [dx/dt]              = {}", vxLB);
     VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
     VF_LOG_INFO("dxGrid [-]                       = {}\n", dxGrid);
-
+    VF_LOG_INFO("dt [s]                           = {}", dt);
     VF_LOG_INFO("simulation parameters:");
     VF_LOG_INFO("mainKernel                       = {}\n", para->getMainKernel());
 
@@ -226,7 +204,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (generatePart == 0)
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
                 if (generatePart == 1)
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
@@ -303,13 +281,13 @@ void multipleLevel(std::filesystem::path& configPath)
                 }
                 if (generatePart == 2) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                 if (generatePart == 3) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 1) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
@@ -472,22 +450,22 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (generatePart == 4) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 5) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 6) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 7) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 //////////////////////////////////////////////////////////////////////////
             }
@@ -513,7 +491,7 @@ void multipleLevel(std::filesystem::path& configPath)
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
 
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->writeGridsToVtk(outPath + "/grid/");
@@ -534,7 +512,7 @@ void multipleLevel(std::filesystem::path& configPath)
         gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
     }
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
     sim.run();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
index c710922b9fc82ac7680f5f7daade4faa235bc957..c5789cdf96049b7c0a31ce693c29cd2db4952a58 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
@@ -4,35 +4,11 @@
 Devices="0 1 2 3"
 NumberOfDevices=4
 
-##################################################
-#informations for Writing
-##################################################
-Path=/work/y0078217/Results/DrivenCavityMultiGPUResults/4GPU/
-#Prefix="DrivenCavityMultiGPU" 
-#WriteGrid=true
-##################################################
-#informations for reading
-##################################################
-GridPath=/work/y0078217/Grids/GridDrivenCavityMultiGPU/4GPU/
-#GridPath="C:"
-
-##################################################
-#number of grid levels
-##################################################
-#NOGL=1
-
-##################################################
-#LBM Version
-##################################################
-#D3Qxx=27
-#MainKernelName=CumulantK17CompChim
-
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=1
-TimeOut=1
-#TimeStartOut=0
+TimeEnd=10000
+TimeOut=10000
 
 ##################################################
 # CUDA Streams and optimized communication (only used for multiple GPUs)
diff --git a/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40b4f08d7500c56efae7378df6398d065e4ecbfb
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(DrivenCavityUniform LANGUAGES CUDA CXX)
+
+#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp)
+
+set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(DrivenCavityUniform PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
diff --git a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..958ef4714118aac34b8cfb0bec3aab97b108b01d
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
@@ -0,0 +1,231 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file LidDrivenCavity.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr, Stephan Lenz
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "Core/DataTypes.h"
+#include "Core/LbmOrGks.h"
+#include "Core/Logger/Logger.h"
+#include "Core/VectorTypes.h"
+#include "PointerDefinitions.h"
+
+#include <logger/Logger.h>
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+#include "GridGenerator/geometries/Cuboid/Cuboid.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+int main()
+{
+    try {
+         vf::logging::Logger::initalizeLogger();
+        //////////////////////////////////////////////////////////////////////////
+        // Simulation parameters
+        //////////////////////////////////////////////////////////////////////////
+        std::string path("./output/DrivenCavity_uniform");
+        std::string simulationName("LidDrivenCavity");
+
+        const real L = 1.0;
+        const real Re = 1000.0;
+        const real velocity = 1.0;
+        const real dt = (real)0.5e-3;
+        const uint nx = 64;
+
+        const uint timeStepOut = 1000;
+        const uint timeStepEnd = 10000;
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup logger
+        //////////////////////////////////////////////////////////////////////////
+
+        logging::Logger::addStream(&std::cout);
+        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+        logging::Logger::timeStamp(logging::Logger::ENABLE);
+        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup gridGenerator
+        //////////////////////////////////////////////////////////////////////////
+
+        auto gridFactory = GridFactory::make();
+        gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+        auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+        //////////////////////////////////////////////////////////////////////////
+        // create grid
+        //////////////////////////////////////////////////////////////////////////
+
+        real dx = L / real(nx);
+
+        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
+
+        // gridBuilder->addGrid(new Cuboid(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid
+        GridScalingFactory scalingFactory = GridScalingFactory();
+        scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+
+        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+        gridBuilder->buildGrids(LbmOrGks::LBM, false);
+
+        //////////////////////////////////////////////////////////////////////////
+        // compute parameters in lattice units
+        //////////////////////////////////////////////////////////////////////////
+
+        const real velocityLB = velocity * dt / dx; // LB units
+
+        const real vxLB = velocityLB / sqrt(2.0); // LB units
+        const real vyLB = velocityLB / sqrt(2.0); // LB units
+
+        const real viscosityLB = nx * velocityLB / Re; // LB units
+
+        //////////////////////////////////////////////////////////////////////////
+        // set parameters
+        //////////////////////////////////////////////////////////////////////////
+        SPtr<Parameter> para = std::make_shared<Parameter>();
+
+        para->setOutputPath(path);
+        para->setOutputPrefix(simulationName);
+
+        para->setPrintFiles(true);
+
+        para->setVelocityLB(velocityLB);
+        para->setViscosityLB(viscosityLB);
+
+        para->setVelocityRatio(velocity / velocityLB);
+        para->setDensityRatio(1.0);
+
+        para->setTimestepOut(timeStepOut);
+        para->setTimestepEnd(timeStepEnd);
+
+        para->setMainKernel("CumulantK17");
+
+        //////////////////////////////////////////////////////////////////////////
+        // set boundary conditions
+        //////////////////////////////////////////////////////////////////////////
+
+        gridBuilder->setNoSlipBoundaryCondition(SideType::PX);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
+
+        BoundaryConditionFactory bcFactory;
+
+        bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack);
+        bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible);
+
+        //////////////////////////////////////////////////////////////////////////
+        // set copy mesh to simulation
+        //////////////////////////////////////////////////////////////////////////
+
+        vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
+
+        auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+        SPtr<GridProvider> gridGenerator =
+            GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+
+
+        //////////////////////////////////////////////////////////////////////////
+        // run simulation
+        //////////////////////////////////////////////////////////////////////////
+
+        VF_LOG_INFO("Start Running DrivenCavity Showcase...");
+        printf("\n");
+        VF_LOG_INFO("world parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("dt [s]                 = {}", dt);
+        VF_LOG_INFO("world_length   [m]     = {}", L);
+        VF_LOG_INFO("world_velocity [m/s]   = {}", velocity);
+        VF_LOG_INFO("dx [m]                 = {}", dx);
+        printf("\n");
+        VF_LOG_INFO("LB parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("Re                     = {}", Re);
+        VF_LOG_INFO("lb_velocity [dx/dt]    = {}", velocityLB);
+        VF_LOG_INFO("lb_viscosity [dx^2/dt] = {}", viscosityLB);
+        VF_LOG_INFO("lb_vx [dx/dt] (lb_velocity/sqrt(2)) = {}", vxLB);
+        VF_LOG_INFO("lb_vy [dx/dt] (lb_velocity/sqrt(2)) = {}", vyLB);
+        printf("\n");
+        VF_LOG_INFO("simulation parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("nx                     = {}", nx);
+        VF_LOG_INFO("ny                     = {}", nx);
+        VF_LOG_INFO("nz                     = {}", nx);
+        VF_LOG_INFO("number of nodes        = {}", nx * nx * nx);
+        VF_LOG_INFO("n timesteps            = {}", timeStepOut);
+        VF_LOG_INFO("write_nth_timestep     = {}", timeStepEnd);
+        VF_LOG_INFO("output_path            = {}", path);
+
+        Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
+        sim.run();
+
+    } catch (const spdlog::spdlog_ex &ex) {
+        std::cout << "Log initialization failed: " << ex.what() << std::endl;
+    } catch (const std::bad_alloc &e) {
+        VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+    } catch (const std::exception &e) {
+        VF_LOG_CRITICAL("exception: {}", e.what());
+    } catch (...) {
+        VF_LOG_CRITICAL("Unknown exception!");
+    }
+
+    return 0;
+}
diff --git a/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
new file mode 100644
index 0000000000000000000000000000000000000000..458346a67c7f001580494af1dc9262034613be68
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
@@ -0,0 +1,34 @@
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+#Path = "output/"
+#Prefix="DrivenCavity" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+#GridPath="grid/"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantAA2016CompSP27
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=100000
+#TimeOut=1000 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index efac863fc9efd446e5f266648ad4fa74c954634f..dc5eaf58aff9b4a1b87d70c187b81461330ee3da 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -40,7 +40,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -49,7 +48,7 @@
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 
 //////////////////////////////////////////////////////////////////////////
 
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
index 4e2b0c91482b6a650ff28a210673cac097cb8c2d..2bf6955062da5c98f6a7b931c19821c52eaf15ea 100644
--- a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
+++ b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
@@ -7,14 +7,14 @@ NumberOfDevices=4
 ##################################################
 #informations for Writing
 ##################################################
-Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/
+#Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/
 #Path="F:/Work/Computations/out/MusselOyster/"
 #Prefix="MusselOyster" 
 #WriteGrid=true
 ##################################################
 #informations for reading
 ##################################################
-GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
+#GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
 #GridPath="C:"
 
 ##################################################
@@ -31,8 +31,8 @@ GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=400000 # 800000
-TimeOut=100000 # 400000
+TimeEnd=100000 # 800000
+TimeOut=10000 # 400000
 #TimeStartOut=0
 
 ##################################################
diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
index d8642c7b267bcad6c58ab2a9c178c2d9394ecf2a..7514c2b273bf60d6e2523f132911dde8839d296a 100644
--- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
+++ b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
@@ -1,63 +1,95 @@
-//#define MPI_LOGGING
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TGV_3D.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
 
-//Martin Branch
+#include "mpi.h"
 
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
+//////////////////////////////////////////////////////////////////////////
 
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
-#define _USE_MATH_DEFINES
-#include <math.h>
+#include "Core/DataTypes.h"
+#include "Core/LbmOrGks.h"
+#include "Core/Logger/Logger.h"
+#include "Core/VectorTypes.h"
+#include "PointerDefinitions.h"
 
-//#include "metis.h"
+//////////////////////////////////////////////////////////////////////////
 
-#include "basics/Core/LbmOrGks.h"
-#include "basics/Core/StringUtilities/StringUtil.h"
-#include <basics/config/ConfigurationFile.h>
+#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
+#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-
-#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
-#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
-
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
 
-#include "global.h"
-
-#include "geometries/Sphere/Sphere.h"
-#include "geometries/VerticalCylinder/VerticalCylinder.h"
-#include "geometries/Cuboid/Cuboid.h"
-#include "geometries/TriangularMesh/TriangularMesh.h"
-#include "geometries/Conglomerate/Conglomerate.h"
-#include "geometries/TriangularMesh/TriangularMeshStrategy.h"
-
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "grid/GridBuilder/MultipleGridBuilder.h"
-#include "grid/BoundaryConditions/Side.h"
-#include "grid/BoundaryConditions/BoundaryCondition.h"
-#include "grid/GridFactory.h"
+#include <logger/Logger.h>
 
-#include "io/SimulationFileWriter/SimulationFileWriter.h"
-#include "io/GridVTKWriter/GridVTKWriter.h"
-#include "io/STLReaderWriter/STLReader.h"
-#include "io/STLReaderWriter/STLWriter.h"
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "utilities/math/Math.h"
-#include "utilities/communication.h"
-#include "utilities/transformator/TransformatorImp.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c
@@ -94,8 +126,8 @@ bool useWale = false;
 
 std::string kernel( "CumulantK17Comp" );
 
-std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
-//std::string path("E:/DrivenCavity/results/"); //TESLA03
+//std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
+std::string path("D:/out/TGV_3D/"); //TESLA03
 
 std::string simulationName("TGV_3D");
 //////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efb4310669f9c0de7aa5cf3f1e4dffa00bd66cbf
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt
@@ -0,0 +1,7 @@
+PROJECT(TGV_3D_GridRef LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES TGV_3D_GridRef.cpp)
+
+set_source_files_properties(TGV_3D_GridRef.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(TGV_3D_GridRef PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a88fee2e583a7cb227702ff19ada7daced1b1708
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
@@ -0,0 +1,399 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TGV_3D.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "Core/DataTypes.h"
+#include "Core/LbmOrGks.h"
+#include "Core/Logger/Logger.h"
+#include "Core/VectorTypes.h"
+#include "PointerDefinitions.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
+#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+
+#include <logger/Logger.h>
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+char* getCmdOption(char ** begin, char ** end, const std::string & option)
+{
+    char ** itr = std::find(begin, end, option);
+    if (itr != end && ++itr != end)
+    {
+        return *itr;
+    }
+    return 0;
+}
+
+bool cmdOptionExists(char** begin, char** end, const std::string& option)
+{
+    return std::find(begin, end, option) != end;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////
+real Re =  1600.0;
+
+uint dtPerL = 500;
+
+uint nx = 64;
+uint gpuIndex = 0;
+
+bool useLimiter = false;
+bool useWale = false;
+
+std::string kernel( "CumulantK17CompChimRedesigned" );
+
+std::string path("D:/out/TGV_3D/"); //MOLLOK
+
+std::string simulationName("TGV_3D_Gridref_noSqPress");
+//////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
+
+    auto gridFactory = GridFactory::make();
+    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+    vf::basics::ConfigurationFile config;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
+    GridScalingFactory scalingFactory = GridScalingFactory();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	const real PI = 3.141592653589793238462643383279;
+
+    real L = nx / ( 2.0 * PI );
+
+    const real velocity = 64.0 / ( dtPerL * 2.0 * PI );
+
+    const real viscosity = nx / ( 2.0 * PI ) * velocity / Re;
+
+    *logging::out << logging::Logger::INFO_HIGH << "velocity = " << velocity << " s\n";
+
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity = " << viscosity << "\n";
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	real dx = 2.0 * PI / real(nx);
+
+	gridBuilder->addCoarseGrid(-PI, -PI, -PI,
+								PI,  PI,  PI, dx);
+
+    gridBuilder->setNumberOfLayers(0, 0);
+
+    auto fineGrid = new Cuboid(-PI * 0.5, -PI * 0.5, -PI * 0.5, 
+                                     0.0,  PI * 0.5,       0.0);
+
+    gridBuilder->addGrid(fineGrid, 1);
+
+	gridBuilder->setPeriodicBoundaryCondition(true, true, true);
+
+	gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+
+	//std::stringstream _path;
+ //   std::stringstream _prefix;
+
+ //   //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_Re_1.6e4";
+ //   //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_neqInit";
+ //   _path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/Re_1600/AA2016/" << nx << "_FD_O8";
+
+ //   //_path << "./results/AA2016/" << nx;
+ //   //_path << "./results/CumOne/" << nx;
+ //   //_path << "./results/F3_2018/" << nx;
+
+ //   _prefix << "TGV_3D_" << nx << "_" ;
+
+ //   para->setOutputPath(_path.str());
+ //   para->setOutputPrefix(_prefix.str());
+ //   para->setPathAndFilename(_path.str() + "/" + _prefix.str());
+
+    //////////////////////////////////////////////////////////////////////////
+
+    {
+        std::stringstream _path;
+
+        _path << path;
+        _path << kernel;
+        _path << "SingleGPU";
+
+        if (useLimiter) _path << "_Limiter";
+
+        path = _path.str();
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+
+    {
+        std::stringstream _simulationName;
+
+        _simulationName << simulationName;
+        _simulationName << "_nx_" << nx;
+        _simulationName << "_dtPerL_" << dtPerL << "_";
+
+        simulationName = _simulationName.str();
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+
+    para->setDevices(std::vector<uint>{gpuIndex});
+
+    //////////////////////////////////////////////////////////////////////////
+
+    para->setOutputPath( path );
+    para->setOutputPrefix( simulationName );
+
+    para->setPrintFiles(true);
+
+    para->setTimestepEnd(40 * lround(L / velocity));
+    para->setTimestepOut(5 * lround(L / velocity));
+    //para->setTimestepOut(lround(L / velocity));
+ //   para->setTimestepEnd(2048);
+	//para->setTimestepOut(512);
+ //   para->setTimestepStartOut(500);
+
+    para->setVelocityLB( velocity );
+
+    para->setViscosityLB( viscosity );
+
+    para->setVelocityRatio( 1.0 / velocity );
+
+    para->setDensityRatio(1.0);
+
+    para->setInitialCondition( [&]( real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz){
+
+        real a = 1.0;
+        real b = 1.0;
+        real c = 1.0;
+
+        rho = 3.0 * ((velocity * velocity) / 16.0 * ( cos( 2.0 * a * coordX ) + cos( 2.0 * b * coordY ) ) * ( cos( 2.0 * c * coordZ ) + 2.0 ) );
+        vx  =  velocity * sin( a * coordX ) * cos( b * coordY ) * cos( c * coordZ );
+        vy  = -velocity * cos( a * coordX ) * sin( b * coordY ) * cos( c * coordZ );
+        vz  = 0.0;
+
+    } );
+
+    para->setMainKernel( kernel );
+
+    if( !useLimiter )
+        para->setQuadricLimiters( 1000000.0, 1000000.0, 1000000.0 );
+
+    if( useWale )
+        para->setUseWale( true );
+
+    para->setUseInitNeq( true );
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+    SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+    //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
+    sim.run();
+
+    //sim.addKineticEnergyAnalyzer( 10 );
+    //sim.addEnstrophyAnalyzer( 10 );
+
+    //sim.run();
+}
+
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2;
+    if ( argv != NULL )
+    {
+        //str = static_cast<std::string>(argv[0]);
+
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+			std::string targetPath( __FILE__ );
+
+#ifdef _WIN32
+			targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+			targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+            //////////////////////////////////////////////////////////////////////////
+
+            if( cmdOptionExists( argv, argv+argc, "--Re" ) )
+                Re = atof( getCmdOption( argv, argv+argc, "--Re" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--nx" ) )
+                nx = atoi( getCmdOption( argv, argv+argc, "--nx" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--dtPerL" ) )
+                dtPerL = atoi( getCmdOption( argv, argv+argc, "--dtPerL" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--kernel" ) )
+                kernel = getCmdOption( argv, argv+argc, "--kernel" );
+
+            if( cmdOptionExists( argv, argv+argc, "--gpu" ) )
+                gpuIndex = atoi( getCmdOption( argv, argv+argc, "--gpu" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--useLimiter" ) )
+                useLimiter = true;
+
+            if( cmdOptionExists( argv, argv+argc, "--useWale" ) )
+                useWale = true;
+
+			multipleLevel(targetPath + "config.txt");
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const std::bad_alloc& e)
+        {
+
+            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+            //std::cout << e.what() << std::flush;
+            //MPI_Abort(MPI_COMM_WORLD, -1);
+        }
+        catch (const std::exception& e)
+        {
+
+            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+            //std::cout << e.what() << std::flush;
+            //MPI_Abort(MPI_COMM_WORLD, -1);
+        }
+        catch (...)
+        {
+            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+            //std::cout << "unknown exeption" << std::endl;
+        }
+
+        //std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
+        //MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+
+
+   /*
+   MPE_Init_log() & MPE_Finish_log() are NOT needed when
+   liblmpe.a is linked with this program.  In that case,
+   MPI_Init() would have called MPE_Init_log() already.
+   */
+#if defined( MPI_LOGGING )
+   MPE_Init_log();
+#endif
+
+#if defined( MPI_LOGGING )
+   if ( argv != NULL )
+      MPE_Finish_log( argv[0] );
+   if ( str != "" )
+      MPE_Finish_log( str.c_str() );
+   else
+      MPE_Finish_log( "TestLog" );
+#endif
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/config.txt b/apps/gpu/LBM/TGV_3D_GridRef/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae6d3e9bc4be5403d151f3d59ffb13af7164abf0
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/config.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+#Path="E:/DrivenCavity/results"
+#Path="F:/Work/Computations/out/DrivenCavity/"
+#Prefix="DrivenCavity" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+#GridPath="E:/DrivenCavity/dummy"
+GridPath="F:/Work/Computations/out/TaylorGreen3DNew/grid"
+
+##################################################
+#number of grid levels
+##################################################
+NOGL=2
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantAA2016CompSP27
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=100000
+#TimeOut=1000 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
index 8c303dc07c911c363e892ce53f7bfe7f48e284d6..045c208274bc6bc216d25e8c2fa905916a52f87b 100644
--- a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
+++ b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
@@ -1,7 +1,38 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TGV_3d_MuitiGPU.cpp
+//! \ingroup TGV_3D_MultiGPU
+//! \author Martin Schoenherr
+//=======================================================================================
 //#define MPI_LOGGING
 
 //Martin Branch
-
 #include <mpi.h>
 #if defined( MPI_LOGGING )
 	#include <mpe.h>
@@ -97,7 +128,7 @@ bool useWale = false;
 int mpirank;
 int mpiWorldSize;
 
-std::string kernel( "CumulantK20Comp" );
+std::string kernel( "CumulantK17CompChim" );
 
 //std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
 //std::string path("results/"); //PHOENIX
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index 06b3678d7c8ddd236c26a69686356fbe87c31db2..3e083afd690632dbaabdde5d00f2ab454d86032b 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WTG_RUB.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
 #define _USE_MATH_DEFINES
 #include <math.h>
 #include <string>
@@ -15,15 +47,10 @@
 
 #include "Core/DataTypes.h"
 #include "PointerDefinitions.h"
-
 #include "Core/LbmOrGks.h"
-#include "Core/StringUtilities/StringUtil.h"
-
 #include "Core/VectorTypes.h"
 #include "Core/Logger/Logger.h"
 
-#include <basics/config/ConfigurationFile.h>
-
 //////////////////////////////////////////////////////////////////////////
 
 #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
diff --git a/metadata.xml b/metadata.xml
deleted file mode 100644
index 7cbae3ae7e1d5d7d48af2f0e5577253a89f953f5..0000000000000000000000000000000000000000
--- a/metadata.xml
+++ /dev/null
@@ -1,204 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd">
-	<identifier identifierType="DOI">PLACEHOLDER</identifier>
-	<titles>
-		<title xml:lang="en">VirtualFluids</title>
-	</titles>
-	<language>en</language>
-	<creators>
-		<creator>
-			<creatorName nameType="Personal">Krafczyk, Manfred</creatorName>
-			<givenName>Manfred</givenName>
-			<familyName>Krafczyk</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID">0000-0002-8509-0871</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="de">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</creator>
-		<creator>
-			<creatorName nameType="Organizational">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</creatorName>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-		</creator>
-	</creators>
-	<publisher xml:lang="de">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</publisher>
-	<publicationYear>2021</publicationYear>
-	<resourceType resourceTypeGeneral="Software">Computational Fluid Dynamics Solver</resourceType>
-	<subjects>
-		<subject subjectScheme="DDC" schemeURI="https://www.oclc.org/en/dewey.html">532 Fluid Mechanics, liquid mechanics</subject>
-	</subjects>
-	<contributors>
-		<contributor contributorType="Researcher">
-			<contributorName>Ahrenholz, Benjamin</contributorName>
-			<givenName>Benjamin</givenName>
-			<familyName>Ahrenholz</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Alihussein, Hussein</contributorName>
-			<givenName>Hussein</givenName>
-			<familyName>Alihussein</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3656-7028</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Bindick, Sebastian</contributorName>
-			<givenName>Sebastian</givenName>
-			<familyName>Bindick</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Brendel, Aileen</contributorName>
-			<givenName>Aileen</givenName>
-			<familyName>Brendel</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Geier, Martin</contributorName>
-			<givenName>Martin</givenName>
-			<familyName>Geier</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-8367-9412</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Geller, Sebastian</contributorName>
-			<givenName>Sebastian</givenName>
-			<familyName>Geller</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Goraki Fard, Ehsan</contributorName>
-			<givenName>Ehsan</givenName>
-			<familyName>Goraki Fard</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Hegewald, Jan</contributorName>
-			<givenName>Jan</givenName>
-			<familyName>Hegewald</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>JanÃŸen, Christian</contributorName>
-			<givenName>Christian</givenName>
-			<familyName>JanÃŸen</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Kutscher, Konstantin</contributorName>
-			<givenName>Konstantin</givenName>
-			<familyName>Kutscher</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-1099-1608</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Lenz, Stephan</contributorName>
-			<givenName>Stephan</givenName>
-			<familyName>Lenz</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Linxweiler, Jan</contributorName>
-			<givenName>Jan</givenName>
-			<familyName>Linxweiler</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-2755-5087</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Lux, Lennard</contributorName>
-			<givenName>Lennard</givenName>
-			<familyName>Lux</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Marcus, Sven</contributorName>
-			<givenName>Sven</givenName>
-			<familyName>Marcus</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3689-2162</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">UniversitÃ¤tsbibliothek Braunschweig</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Peters, SÃ¶ren</contributorName>
-			<givenName>SÃ¶ren</givenName>
-			<familyName>Peters</familyName>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Safari, Hesameddin</contributorName>
-			<givenName>Hesameddin</givenName>
-			<familyName>Safari</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>SchÃ¶nherr, Martin</contributorName>
-			<givenName>Martin</givenName>
-			<familyName>SchÃ¶nherr</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-4774-1776</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut fÃ¼r rechnergestÃ¼tzte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Stiebler, Maik</contributorName>
-			<givenName>Maik</givenName>
-			<familyName>Stiebler</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Textor, SÃ¶ren</contributorName>
-			<givenName>SÃ¶ren</givenName>
-			<familyName>Textor</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>TÃ¶lke, Jonas</contributorName>
-			<givenName>Jonas</givenName>
-			<familyName>TÃ¶lke</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Uphoff, Sonja</contributorName>
-			<givenName>Sonja</givenName>
-			<familyName>Uphoff</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Wellmann, Anna</contributorName>
-			<givenName>Anna</givenName>
-			<familyName>Wellmann</familyName>
-		</contributor>
-	</contributors>
-	<dates>
-		<date dateType="Created">2000</date>
-	</dates>
-	<formats>
-		<format>text/x-c</format>
-		<format>text/x-h</format>
-		<format>text/x-script.python</format>
-	</formats>
-	<relatedIdentifiers>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="Requires" resourceTypeGeneral="Software">https://www.open-mpi.org/software/ompi/v4.1/</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://cmake.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://gcc.gnu.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://clang.llvm.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://visualstudio.microsoft.com/vs/features/cplusplus/</relatedIdentifier>
-	</relatedIdentifiers>
-	<rightsList>
-		<rights xml:lang="en" schemeURI="https://spdx.org/licenses/" rightsIdentifierScheme="SPDX" rightsIdentifier="GPL-3.0-only" rightsURI="https://www.gnu.org/licenses/gpl-3.0-standalone.html">GNU General Public License Version 3</rights>
-	</rightsList>
-	<descriptions>
-		<description descriptionType="Abstract">
-			VirtualFluids (VF) is a research code developed at the Institute for Computational Modeling in Civil Engineering (iRMB). The code is a Computational Fluid Dynamics (CFD) solver based on the Lattice Boltzmann Method (LBM) for turbulent, thermal, multiphase and multicomponent flow problems as well as for multi-field problems such as Fluid-Structure-interaction including distributed pre- and postprocessing capabilities for simulations with more than 100 billion degrees of freedom.
-		</description>
-	</descriptions>
-</resource>
diff --git a/pyproject.toml b/pyproject.toml
index 8fcb7926102d188b44d8c74084235b6f175edf80..257da6fd95d683081dbff865c864079eae9c675d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,9 @@
 [build-system]
-requires = ["setuptools", "wheel", "scikit-build"]
\ No newline at end of file
+requires = [
+    "setuptools>=42",
+    "scikit-build",
+    "cmake",
+    "ninja; platform_system!='Windows'"
+]
+build-backend = "setup_builder"
+backend-path = ["utilities"]
\ No newline at end of file
diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt
index 5a84adef027fdfa2953e016693bb64570e48c1ef..815a4b59cf6c3e4e5ac4a7a72a5bd4e374d64c96 100644
--- a/pythonbindings/CMakeLists.txt
+++ b/pythonbindings/CMakeLists.txt
@@ -1,24 +1,45 @@
-project(VirtualFluidsPython LANGUAGES CUDA CXX)
+set(PYFLUIDS_LANGUAGES CXX)
+
+if(BUILD_VF_GPU)
+    set(PYFLUIDS_LANGUAGES CUDA CXX)
+endif()
+
+project(VirtualFluidsPython LANGUAGES ${PYFLUIDS_LANGUAGES})
+
+pybind11_add_module(python_bindings MODULE src/VirtualFluids.cpp)
+
+set_target_properties(  python_bindings PROPERTIES
+                        LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pyfluids
+                        OUTPUT_NAME "bindings")
+
+target_link_libraries(python_bindings PRIVATE basics logger mpi)
+
 IF(BUILD_VF_GPU)
-    pybind11_add_module(pyfluids src/VirtualFluidsModulesGPU.cpp)
-    set_source_files_properties(src/VirtualFluidsModulesGPU.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(src/VirtualFluids.cpp PROPERTIES LANGUAGE CUDA)
 
-    target_link_libraries(pyfluids PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger)
-    target_include_directories(pyfluids PRIVATE ${VF_THIRD_DIR}/cuda_samples/)
+    target_include_directories(python_bindings PRIVATE ${VF_THIRD_DIR}/cuda_samples/)
+    target_compile_definitions(python_bindings PRIVATE VF_GPU_PYTHONBINDINGS)
 
+    target_link_libraries(python_bindings PRIVATE GridGenerator VirtualFluids_GPU lbm)
 ENDIF()
+
 IF(BUILD_VF_CPU)
-    pybind11_add_module(pyfluids src/VirtualFluidsModulesCPU.cpp)
-    pybind11_add_module(pymuparser src/muParser.cpp)
+    target_compile_definitions(python_bindings PRIVATE VF_METIS VF_MPI VF_CPU_PYTHONBINDINGS)
+    target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser lbm)
+
+    # include bindings for muparsers
+    pybind11_add_module(pymuparser MODULE src/muParser.cpp)
 
     # TODO: Move this to MuParser CMakeLists.txt
     set_target_properties(muparser PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
-    target_compile_definitions(pyfluids PRIVATE VF_METIS VF_MPI)
+    set_target_properties(  pymuparser PROPERTIES
+                            LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pymuparser
+                            OUTPUT_NAME "bindings")
     target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI)
-
-    target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics)
     target_link_libraries(pymuparser PRIVATE muparser)
 ENDIF()
-target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/)
-target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
+
+
+target_include_directories(python_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
+target_include_directories(python_bindings PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
diff --git a/Python/boundary_layer/__init__.py b/pythonbindings/pyfluids-stubs/__init__.pyi
similarity index 100%
rename from Python/boundary_layer/__init__.py
rename to pythonbindings/pyfluids-stubs/__init__.pyi
diff --git a/pythonbindings/pyfluids-stubs/bindings/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..4e7f353eab97cc536f8f18e72319af1cd7a1916a
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi
@@ -0,0 +1,38 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file __init__.pyi
+! \ingroup bindings
+! \author Henry Korb
+=======================================================================================
+"""
+class ostream_redirect:
+    def __init__(self, stdout: bool = ..., stderr: bool = ...) -> None: ...
+    def __enter__(self) -> None: ...
+    def __exit__(self, *args) -> None: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..a41b7934ca706dc0db5bd6188fee3150456e0cd9
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
@@ -0,0 +1,82 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file __init__.py
+! \ingroup basics
+! \author Henry Korb
+=======================================================================================
+"""
+from typing import ClassVar
+
+from typing import overload
+
+class ConfigurationFile:
+    def __init__(self) -> None: ...
+    def contains(self, key: str) -> bool: ...
+    @overload
+    def get_bool_value(self, key: str) -> bool: ...
+    @overload
+    def get_bool_value(self, key: str, default_value: bool) -> bool: ...
+    @overload
+    def get_double_value(self, key: str) -> float: ...
+    @overload
+    def get_double_value(self, key: str, default_value: float) -> float: ...
+    @overload
+    def get_float_value(self, key: str) -> float: ...
+    @overload
+    def get_float_value(self, key: str, default_value: float) -> float: ...
+    @overload
+    def get_int_value(self, key: str) -> int: ...
+    @overload
+    def get_int_value(self, key: str, default_value: int) -> int: ...
+    @overload
+    def get_string_value(self, key: str) -> str: ...
+    @overload
+    def get_string_value(self, key: str, default_value: str) -> str: ...
+    @overload
+    def get_uint_value(self, key: str) -> int: ...
+    @overload
+    def get_uint_value(self, key: str, default_value: int) -> int: ...
+    def load(self, file: str) -> bool: ...
+
+class LbmOrGks:
+    __members__: ClassVar[dict] = ...  # read-only
+    GKS: ClassVar[LbmOrGks] = ...
+    LBM: ClassVar[LbmOrGks] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..43938ff7646efd3c596ae29971cce39fed865fa6
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi
@@ -0,0 +1,83 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file logger.pyi
+! \ingroup basics
+! \author Henry Korb
+=======================================================================================
+"""
+from typing import Any, ClassVar
+
+log: None
+
+class Level:
+    __members__: ClassVar[dict] = ...  # read-only
+    INFO_HIGH: ClassVar[Level] = ...
+    INFO_INTERMEDIATE: ClassVar[Level] = ...
+    INFO_LOW: ClassVar[Level] = ...
+    LOGGER_ERROR: ClassVar[Level] = ...
+    WARNING: ClassVar[Level] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class Logger:
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def add_stdout() -> None: ...
+    @staticmethod
+    def enable_printed_rank_numbers(print: bool) -> None: ...
+    @staticmethod
+    def set_debug_level(level: int) -> None: ...
+    @staticmethod
+    def time_stamp(time_stemp: TimeStamp) -> None: ...
+
+class TimeStamp:
+    __members__: ClassVar[dict] = ...  # read-only
+    DISABLE: ClassVar[TimeStamp] = ...
+    ENABLE: ClassVar[TimeStamp] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..36c2fea76713e980bb95eb6726d778de8c9a6583
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi
@@ -0,0 +1,436 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file __init__.pyi
+! \ingroup gpu
+! \author Henry Korb
+=======================================================================================
+"""
+from typing import Any, Callable, ClassVar, List, Optional
+
+from typing import overload
+import numpy
+import pyfluids.bindings.basics
+import pyfluids.bindings.gpu.grid_generator as grid_generator
+
+class ActuatorFarm(PreCollisionInteractor):
+    def __init__(self, number_of_blades_per_turbine: int, density: float, number_of_nodes_per_blade: int, epsilon: float, level: int, delta_t: float, delta_x: float, use_host_arrays: bool) -> None: ...
+    def add_turbine(self, posX: float, posY: float, posZ: float, diameter: float, omega: float, azimuth: float, yaw: float, bladeRadii: List[float]) -> None: ...
+    def calc_blade_forces(self) -> None: ...
+    def get_all_azimuths(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_x_device(self) -> int: ...
+    def get_all_blade_coords_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_y_device(self) -> int: ...
+    def get_all_blade_coords_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_coords_z_device(self) -> int: ...
+    def get_all_blade_forces_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_x_device(self) -> int: ...
+    def get_all_blade_forces_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_y_device(self) -> int: ...
+    def get_all_blade_forces_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_forces_z_device(self) -> int: ...
+    def get_all_blade_radii(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_radii_device(self) -> int: ...
+    def get_all_blade_velocities_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_x_device(self) -> int: ...
+    def get_all_blade_velocities_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_y_device(self) -> int: ...
+    def get_all_blade_velocities_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_blade_velocities_z_device(self) -> int: ...
+    def get_all_omegas(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_turbine_pos_x(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_turbine_pos_y(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_turbine_pos_z(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_all_yaws(self) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_azimuth(self, turbine: int) -> float: ...
+    def get_turbine_blade_coords_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_x_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_coords_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_y_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_coords_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_coords_z_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_forces_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_x_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_forces_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_y_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_forces_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_forces_z_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_radii(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_radii_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_velocities_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_x_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_velocities_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_y_device(self, turbine: int) -> int: ...
+    def get_turbine_blade_velocities_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_blade_velocities_z_device(self, turbine: int) -> int: ...
+    def get_turbine_omega(self, turbine: int) -> float: ...
+    def get_turbine_pos(self, turbine: int) -> numpy.ndarray[numpy.float32]: ...
+    def get_turbine_yaw(self, turbine: int) -> float: ...
+    def set_all_azimuths(self, azimuths: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_blade_coords(self, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_blade_forces(self, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_blade_velocities(self, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_omegas(self, omegas: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_all_yaws(self, yaws: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_turbine_azimuth(self, turbine: int, azimuth: float) -> None: ...
+    def set_turbine_blade_coords(self, turbine: int, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_turbine_blade_forces(self, turbine: int, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_turbine_blade_velocities(self, turbine: int, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ...
+    def set_turbine_omega(self, turbine: int, omega: float) -> None: ...
+    def set_turbine_yaw(self, turbine: int, yaw: float) -> None: ...
+    @property
+    def delta_t(self) -> float: ...
+    @property
+    def delta_x(self) -> float: ...
+    @property
+    def density(self) -> float: ...
+    @property
+    def number_of_blades_per_turbine(self) -> int: ...
+    @property
+    def number_of_indices(self) -> int: ...
+    @property
+    def number_of_nodes(self) -> int: ...
+    @property
+    def number_of_nodes_per_blade(self) -> int: ...
+    @property
+    def number_of_turbines(self) -> int: ...
+
+class BoundaryConditionFactory:
+    def __init__(self) -> None: ...
+    def set_geometry_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_no_slip_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_precursor_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_pressure_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_slip_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_stress_boundary_condition(self, boundary_condition_type) -> None: ...
+    def set_velocity_boundary_condition(self, boundary_condition_type) -> None: ...
+
+class Communicator:
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def get_instance() -> Communicator: ...
+    def get_number_of_process(self) -> int: ...
+    def get_pid(self) -> int: ...
+
+class CudaMemoryManager:
+    def __init__(self, parameter: Parameter) -> None: ...
+
+class FileType:
+    __members__: ClassVar[dict] = ...  # read-only
+    VTK: ClassVar[FileType] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class GridProvider:
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def make_grid_generator(builder: grid_generator.GridBuilder, para: Parameter, cuda_memory_manager: CudaMemoryManager, communicator: Communicator) -> GridProvider: ...
+
+class GridScaling:
+    __members__: ClassVar[dict] = ...  # read-only
+    NotSpecified: ClassVar[GridScaling] = ...
+    ScaleCompressible: ClassVar[GridScaling] = ...
+    ScaleRhoSq: ClassVar[GridScaling] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class GridScalingFactory:
+    def __init__(self) -> None: ...
+    def set_scaling_factory(self, scaling_type) -> None: ...
+
+class NoSlipBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    NoSlip3rdMomentsCompressible: ClassVar[NoSlipBC] = ...
+    NoSlipBounceBack: ClassVar[NoSlipBC] = ...
+    NoSlipCompressible: ClassVar[NoSlipBC] = ...
+    NoSlipImplicitBounceBack: ClassVar[NoSlipBC] = ...
+    NoSlipIncompressible: ClassVar[NoSlipBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class OutputVariable:
+    __members__: ClassVar[dict] = ...  # read-only
+    Distributions: ClassVar[OutputVariable] = ...
+    Velocities: ClassVar[OutputVariable] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class Parameter:
+    @overload
+    def __init__(self, number_of_processes: int, my_ID: int, config_data: Optional[pyfluids.bindings.basics.ConfigurationFile]) -> None: ...
+    @overload
+    def __init__(self, number_of_processes: int, my_ID: int) -> None: ...
+    @overload
+    def __init__(self, config_data: pyfluids.bindings.basics.ConfigurationFile) -> None: ...
+    def add_actuator(self, actuator: PreCollisionInteractor) -> None: ...
+    def add_probe(self, probe: PreCollisionInteractor) -> None: ...
+    def get_SGS_constant(self) -> float: ...
+    def get_density_ratio(self) -> float: ...
+    def get_force_ratio(self) -> float: ...
+    def get_is_body_force(self) -> bool: ...
+    def get_output_path(self) -> str: ...
+    def get_output_prefix(self) -> str: ...
+    def get_velocity(self) -> float: ...
+    def get_velocity_ratio(self) -> float: ...
+    def get_viscosity(self) -> float: ...
+    def get_viscosity_ratio(self) -> float: ...
+    def set_AD_kernel(self, ad_kernel: str) -> None: ...
+    def set_calc_turbulence_intensity(self, calc_velocity_and_fluctuations: bool) -> None: ...
+    def set_comp_on(self, is_comp: bool) -> None: ...
+    def set_density_ratio(self, density_ratio: float) -> None: ...
+    def set_devices(self, devices: List[int]) -> None: ...
+    def set_diff_on(self, is_diff: bool) -> None: ...
+    def set_forcing(self, forcing_x: float, forcing_y: float, forcing_z: float) -> None: ...
+    def set_has_wall_model_monitor(self, has_wall_monitor: bool) -> None: ...
+    def set_initial_condition(self, init_func: Callable[[float,float,float],List[float]]) -> None: ...
+    def set_initial_condition_log_law(self, u_star: float, z0: float, velocity_ratio: float) -> None: ...
+    def set_initial_condition_perturbed_log_law(self, u_star: float, z0: float, length_x: float, length_z: float, height: float, velocity_ratio: float) -> None: ...
+    def set_initial_condition_uniform(self, velocity_x: float, velocity_y: float, velocity_z: float) -> None: ...
+    def set_is_body_force(self, is_body_force: bool) -> None: ...
+    def set_main_kernel(self, kernel: str) -> None: ...
+    def set_max_dev(self, max_dev: int) -> None: ...
+    def set_max_level(self, number_of_levels: int) -> None: ...
+    def set_outflow_pressure_correction_factor(self, correction_factor: float) -> None: ...
+    def set_output_path(self, o_path: str) -> None: ...
+    def set_output_prefix(self, o_prefix: str) -> None: ...
+    def set_print_files(self, print_files: bool) -> None: ...
+    def set_quadric_limiters(self, quadric_limiter_p: float, quadric_limiter_m: float, quadric_limiter_d: float) -> None: ...
+    def set_temperature_BC(self, temp_bc: float) -> None: ...
+    def set_temperature_init(self, temp: float) -> None: ...
+    def set_timestep_end(self, tend: int) -> None: ...
+    def set_timestep_of_coarse_level(self, timestep: int) -> None: ...
+    def set_timestep_out(self, tout: int) -> None: ...
+    def set_timestep_start_out(self, t_start_out: int) -> None: ...
+    def set_use_streams(self, use_streams: bool) -> None: ...
+    def set_velocity_LB(self, velocity: float) -> None: ...
+    def set_velocity_ratio(self, velocity_ratio: float) -> None: ...
+    def set_viscosity_LB(self, viscosity: float) -> None: ...
+    def set_viscosity_ratio(self, viscosity_ratio: float) -> None: ...
+
+class PreCollisionInteractor:
+    def __init__(self, *args, **kwargs) -> None: ...
+
+class PrecursorBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    DistributionsPrecursor: ClassVar[PrecursorBC] = ...
+    NotSpecified: ClassVar[PrecursorBC] = ...
+    VelocityPrecursor: ClassVar[PrecursorBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class PrecursorWriter(PreCollisionInteractor):
+    def __init__(self, filename: str, output_path: str, x_pos: float, y_min: float, y_max: float, z_min: float, z_max: float, t_start_out: int, t_save: int, output_variable: OutputVariable, max_timesteps_per_file: int) -> None: ...
+
+class PressureBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    NotSpecified: ClassVar[PressureBC] = ...
+    OutflowNonReflective: ClassVar[PressureBC] = ...
+    OutflowNonReflectivePressureCorrection: ClassVar[PressureBC] = ...
+    PressureEquilibrium: ClassVar[PressureBC] = ...
+    PressureEquilibrium2: ClassVar[PressureBC] = ...
+    PressureNonEquilibriumCompressible: ClassVar[PressureBC] = ...
+    PressureNonEquilibriumIncompressible: ClassVar[PressureBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class SideType:
+    __members__: ClassVar[dict] = ...  # read-only
+    GEOMETRY: ClassVar[SideType] = ...
+    MX: ClassVar[SideType] = ...
+    MY: ClassVar[SideType] = ...
+    MZ: ClassVar[SideType] = ...
+    PX: ClassVar[SideType] = ...
+    PY: ClassVar[SideType] = ...
+    PZ: ClassVar[SideType] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class Simulation:
+    @overload
+    def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory, gridScalingFactory: GridScalingFactory) -> None: ...
+    @overload
+    def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory) -> None: ...
+    @overload
+    def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory, tmFactory: TurbulenceModelFactory, gridScalingFactory: GridScalingFactory) -> None: ...
+    def addEnstrophyAnalyzer(self, t_analyse: int) -> None: ...
+    def addKineticEnergyAnalyzer(self, t_analyse: int) -> None: ...
+    def run(self) -> None: ...
+
+class SlipBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    NotSpecified: ClassVar[SlipBC] = ...
+    SlipBounceBack: ClassVar[SlipBC] = ...
+    SlipCompressible: ClassVar[SlipBC] = ...
+    SlipCompressibleTurbulentViscosity: ClassVar[SlipBC] = ...
+    SlipIncompressible: ClassVar[SlipBC] = ...
+    SlipPressureCompressibleTurbulentViscosity: ClassVar[SlipBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class StressBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    NotSpecified: ClassVar[StressBC] = ...
+    StressBounceBack: ClassVar[StressBC] = ...
+    StressCompressible: ClassVar[StressBC] = ...
+    StressPressureBounceBack: ClassVar[StressBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class TurbulenceModel:
+    __members__: ClassVar[dict] = ...  # read-only
+    AMD: ClassVar[TurbulenceModel] = ...
+    NONE: ClassVar[TurbulenceModel] = ...
+    QR: ClassVar[TurbulenceModel] = ...
+    Smagorinsky: ClassVar[TurbulenceModel] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class TurbulenceModelFactory:
+    def __init__(self, para: Parameter) -> None: ...
+    def read_config_file(self, config_data: pyfluids.bindings.basics.ConfigurationFile) -> None: ...
+    def set_model_constant(self, model_constant: float) -> None: ...
+    def set_turbulence_model(self, turbulence_model: TurbulenceModel) -> None: ...
+
+class VTKFileCollection(FileCollection):
+    def __init__(self, prefix: str) -> None: ...
+
+class VelocityBC:
+    __members__: ClassVar[dict] = ...  # read-only
+    NotSpecified: ClassVar[VelocityBC] = ...
+    VelocityAndPressureCompressible: ClassVar[VelocityBC] = ...
+    VelocityCompressible: ClassVar[VelocityBC] = ...
+    VelocityIncompressible: ClassVar[VelocityBC] = ...
+    VelocitySimpleBounceBackCompressible: ClassVar[VelocityBC] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class FileCollection:
+    def __init__(self, *args, **kwargs) -> None: ...
+
+def create_file_collection(prefix: str, type: FileType) -> FileCollection: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..8d715e4b4cd49e6dbf92da3aedddbc4b869067c4
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
@@ -0,0 +1,100 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file grid_generator.pyi
+! \ingroup gpu
+! \author Henry Korb
+=======================================================================================
+"""
+from typing import Any, List
+
+from typing import overload
+import pyfluids.bindings.basics
+import pyfluids.bindings.gpu
+
+class BoundingBox:
+    def __init__(self, min_x: float, max_x: float, min_y: float, max_y: float, min_z: float, max_z: float) -> None: ...
+
+class Conglomerate(Object):
+    def __init__(self, *args, **kwargs) -> None: ...
+    def add(self, object: Object) -> None: ...
+    @staticmethod
+    def make_shared() -> Conglomerate: ...
+    def subtract(self, object: Object) -> None: ...
+
+class Cuboid(Object):
+    def __init__(self, min_x1: float, min_x2: float, min_x3: float, max_x1: float, max_x2: float, max_x3: float) -> None: ...
+
+class GridBuilder:
+    def __init__(self, *args, **kwargs) -> None: ...
+    def get_number_of_grid_levels(self) -> int: ...
+
+class GridFactory:
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def make() -> GridFactory: ...
+
+class LevelGridBuilder(GridBuilder):
+    def __init__(self, *args, **kwargs) -> None: ...
+    def set_no_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType) -> None: ...
+    def set_periodic_boundary_condition(self, periodic_x: bool, periodic_y: bool, periodic_z: bool) -> None: ...
+    def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ...
+    def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ...
+    def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ...
+    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float) -> None: ...
+    def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ...
+
+class MultipleGridBuilder(LevelGridBuilder):
+    def __init__(self, *args, **kwargs) -> None: ...
+    def add_coarse_grid(self, start_x: float, start_y: float, start_z: float, end_x: float, end_y: float, end_z: float, delta: float) -> None: ...
+    @overload
+    def add_geometry(self, solid_object: Object) -> None: ...
+    @overload
+    def add_geometry(self, solid_object: Object, level: int) -> None: ...
+    @overload
+    def add_grid(self, grid_shape: Object) -> None: ...
+    @overload
+    def add_grid(self, grid_shape: Object, level_fine: int) -> None: ...
+    def build_grids(self, lbm_or_gks: pyfluids.bindings.basics.LbmOrGks, enable_thin_walls: bool) -> None: ...
+    def get_number_of_levels(self) -> int: ...
+    @staticmethod
+    def make_shared(grid_factory: GridFactory) -> MultipleGridBuilder: ...
+
+class Object:
+    def __init__(self, *args, **kwargs) -> None: ...
+
+class Sphere(Object):
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def make_shared() -> Sphere: ...
+
+class TriangularMesh(Object):
+    def __init__(self, *args, **kwargs) -> None: ...
+    @staticmethod
+    def make() -> TriangularMesh: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..af9c40078e6009efebda4450b5c5e23586aa1e83
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi
@@ -0,0 +1,85 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file probes.pyi
+! \ingroup gpu
+! \author Henry Korb
+=======================================================================================
+"""
+from typing import ClassVar, List
+
+import pyfluids.bindings.gpu
+
+class PlanarAverageProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int, plane_normal: str) -> None: ...
+
+class PlaneProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ...
+    def set_probe_plane(self, pos_x: float, pos_y: float, pos_z: float, delta_x: float, delta_y: float, delta_z: float) -> None: ...
+
+class PointProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int, output_timeseries: bool) -> None: ...
+    def add_probe_points_from_list(self, point_coords_x: List[float], point_coords_y: List[float], point_coords_z: List[float]) -> None: ...
+    def add_probe_points_from_x_normal_plane(self, pos_x: float, pos0_y: float, pos0_z: float, pos1_y: float, pos1_z: float, n_y: int, n_z: int) -> None: ...
+
+class Probe(pyfluids.bindings.gpu.PreCollisionInteractor):
+    def __init__(self, *args, **kwargs) -> None: ...
+    def add_all_available_statistics(self) -> None: ...
+    def add_statistic(self, variable: Statistic) -> None: ...
+    def set_file_name_to_n_out(self) -> None: ...
+
+class Statistic:
+    __members__: ClassVar[dict] = ...  # read-only
+    Instantaneous: ClassVar[Statistic] = ...
+    Means: ClassVar[Statistic] = ...
+    SpatialCovariances: ClassVar[Statistic] = ...
+    SpatialFlatness: ClassVar[Statistic] = ...
+    SpatialMeans: ClassVar[Statistic] = ...
+    SpatialSkewness: ClassVar[Statistic] = ...
+    SpatioTemporalCovariances: ClassVar[Statistic] = ...
+    SpatioTemporalFlatness: ClassVar[Statistic] = ...
+    SpatioTemporalMeans: ClassVar[Statistic] = ...
+    SpatioTemporalSkewness: ClassVar[Statistic] = ...
+    Variances: ClassVar[Statistic] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, arg0: int) -> None: ...
+    def __eq__(self, arg0: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, arg0: object) -> bool: ...
+    def __setstate__(self, arg0: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+
+class WallModelProbe(Probe):
+    def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ...
+    def set_evaluate_pressure_gradient(self, eval_press_grad: bool) -> None: ...
+    def set_force_output_to_stress(self, output_stress: bool) -> None: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/lbm.pyi b/pythonbindings/pyfluids-stubs/bindings/lbm.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pythonbindings/pyfluids-stubs/bindings/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/logger.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..fe84eeb18f3245ef72ed023b2de9db7b9131d144
--- /dev/null
+++ b/pythonbindings/pyfluids-stubs/bindings/logger.pyi
@@ -0,0 +1,45 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file logger.pyi
+! \ingroup bindings
+! \author Henry Korb
+=======================================================================================
+"""
+class Logger:
+    @staticmethod
+    def change_log_path(path: str) -> None: ...
+    @staticmethod
+    def initialize_logger() -> None: ...
+
+def vf_log_critical(message: str) -> None: ...
+def vf_log_debug(message: str) -> None: ...
+def vf_log_info(message: str) -> None: ...
+def vf_log_trace(message: str) -> None: ...
+def vf_log_warning(message: str) -> None: ...
diff --git a/pythonbindings/pyfluids/__init__.py b/pythonbindings/pyfluids/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0537b758267e22a72e5030340de7b87d52f35c3
--- /dev/null
+++ b/pythonbindings/pyfluids/__init__.py
@@ -0,0 +1,54 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file __init__.py
+! \ingroup pyfluids
+! \author Henry Korb
+=======================================================================================
+"""
+try:
+    from .bindings import basics
+except ImportError:
+    print("Basics bindings not included")
+try:
+    from .bindings import logger
+except ImportError:
+    print("Logger bindings not included")
+try:
+    from .bindings import lbm
+except ImportError:
+    print("LBM bindings not included")
+try:
+    from .bindings import gpu
+except ImportError:
+    print("GPU bindings not included")
+try:
+    from .bindings import cpu
+except ImportError:
+    print("CPU bindings not included")
\ No newline at end of file
diff --git a/pythonbindings/pyfluids/py.typed b/pythonbindings/pyfluids/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pythonbindings/pymuparser/__init__.py b/pythonbindings/pymuparser/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..398069bcba03b3fe710d9d9a6398e9c530b19ee9
--- /dev/null
+++ b/pythonbindings/pymuparser/__init__.py
@@ -0,0 +1,38 @@
+r"""
+=======================================================================================
+ ____          ____    __    ______     __________   __      __       __        __
+ \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+      \    \  |    |   ________________________________________________________________
+       \    \ |    |  |  ______________________________________________________________|
+        \    \|    |  |  |         __          __     __     __     ______      _______
+         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+
+  This file is part of VirtualFluids. VirtualFluids is free software: you can
+  redistribute it and/or modify it under the terms of the GNU General Public
+  License as published by the Free Software Foundation, either version 3 of
+  the License, or (at your option) any later version.
+
+  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+
+! \file __init__.pyi
+! \ingroup pymuparser
+! \author Henry Korb
+=======================================================================================
+"""
+try:
+    from .bindings import Parser
+except ImportError as e:
+    raise ImportError("Pymuparser bindings were not built. Only included if VirtualFluids is built with VF_BUILD_CPU=ON.")
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluids.cpp b/pythonbindings/src/VirtualFluids.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..20e5012e0af325440e502c704d6f372100306ab1
--- /dev/null
+++ b/pythonbindings/src/VirtualFluids.cpp
@@ -0,0 +1,63 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VirtualFluids.cpp
+//! \ingroup src
+//! \author Henry Korb
+//=======================================================================================
+#include <pybind11/pybind11.h>
+#include "basics/basics.cpp"
+#include "lbm/lbm.cpp"
+#include "logger/logger.cpp"
+
+#ifdef VF_GPU_PYTHONBINDINGS
+#include "gpu/gpu.cpp"
+#endif
+#ifdef VF_CPU_PYTHONBINDINGS
+#include "cpu/cpu.cpp"
+#endif
+
+
+namespace py_bindings
+{
+    namespace py = pybind11;
+
+    PYBIND11_MODULE(bindings, m)
+    {
+        py::add_ostream_redirect(m, "ostream_redirect");
+        basics::makeModule(m);
+        lbm::makeModule(m);
+        logging::makeModule(m);
+#ifdef VF_GPU_PYTHONBINDINGS
+        gpu::makeModule(m);
+#endif
+#ifdef VF_CPU_PYTHONBINDINGS
+        cpu::makeModule(m);
+#endif
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluidsModulesCPU.cpp b/pythonbindings/src/VirtualFluidsModulesCPU.cpp
deleted file mode 100644
index 2fba3da494f568f7d0d0a117a579a45c9c1b9245..0000000000000000000000000000000000000000
--- a/pythonbindings/src/VirtualFluidsModulesCPU.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <pybind11/pybind11.h>
-#include "cpu/cpu.cpp"
-
-namespace py_bindings
-{
-    namespace py = pybind11;
-
-    PYBIND11_MODULE(pyfluids, m)
-    {
-        cpu::makeModule(m);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluidsModulesGPU.cpp b/pythonbindings/src/VirtualFluidsModulesGPU.cpp
deleted file mode 100644
index b96971caf381faada76ee676cf60469492d055c2..0000000000000000000000000000000000000000
--- a/pythonbindings/src/VirtualFluidsModulesGPU.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <pybind11/pybind11.h>
-#include "basics/basics.cpp"
-#include "lbm/lbm.cpp"
-#include "gpu/gpu.cpp"
-#include "logger/logger.cpp"
-
-namespace py_bindings
-{
-    namespace py = pybind11;
-
-    PYBIND11_MODULE(pyfluids, m)
-    {
-        basics::makeModule(m);
-        gpu::makeModule(m);
-        lbm::makeModule(m);
-        logging::makeModule(m);
-        py::add_ostream_redirect(m, "ostream_redirect");
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/basics.cpp b/pythonbindings/src/basics/basics.cpp
index 381e345d78226b25ec3a77a14340d2ef1171c8c9..e67dfb05308511c8bf79d7e860299f062f317194 100644
--- a/pythonbindings/src/basics/basics.cpp
+++ b/pythonbindings/src/basics/basics.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file basics.cpp
+//! \ingroup basics
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include "submodules/logger.cpp"
 #include "submodules/configuration_file.cpp"
diff --git a/pythonbindings/src/basics/submodules/configuration_file.cpp b/pythonbindings/src/basics/submodules/configuration_file.cpp
index f5a2f87135a17f5eda34a7467d95f9db6b1c21d1..7fcd48c34824b9370eeac1872c899bf980176a52 100644
--- a/pythonbindings/src/basics/submodules/configuration_file.cpp
+++ b/pythonbindings/src/basics/submodules/configuration_file.cpp
@@ -1,5 +1,37 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file configuration_file.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
-#include <basics/config/ConfigurationFile.h>
+#include "basics/config/ConfigurationFile.h"
 
 namespace configuration
 {
@@ -9,6 +41,19 @@ namespace configuration
     {
         py::class_<vf::basics::ConfigurationFile>(parentModule, "ConfigurationFile")
         .def(py::init<>())
-        .def("load", &vf::basics::ConfigurationFile::load);
+        .def("load", &vf::basics::ConfigurationFile::load, py::arg("file"))
+        .def("contains", &vf::basics::ConfigurationFile::contains, py::arg("key"))
+        .def("get_int_value"   , static_cast<int         (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_int_value"   , static_cast<int         (vf::basics::ConfigurationFile::*)(const std::string&, int        ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"))
+        .def("get_uint_value"  , static_cast<uint        (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_uint_value"  , static_cast<uint        (vf::basics::ConfigurationFile::*)(const std::string&, uint       ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"))
+        .def("get_float_value" , static_cast<float       (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_float_value" , static_cast<float       (vf::basics::ConfigurationFile::*)(const std::string&, float      ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"))
+        .def("get_double_value", static_cast<double      (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_double_value", static_cast<double      (vf::basics::ConfigurationFile::*)(const std::string&, double     ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"))
+        .def("get_bool_value"  , static_cast<bool        (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_bool_value"  , static_cast<bool        (vf::basics::ConfigurationFile::*)(const std::string&, bool       ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"))
+        .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"))
+        .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&, std::string) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value"));
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
index ed1deeca62fc57b7f44499b306e9f99b7f990604..d20cf2d1f631f6d36a80c36f1fb6c9c59d192090 100644
--- a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
+++ b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file lbm_or_gks.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include "basics/Core/LbmOrGks.h"
 
diff --git a/pythonbindings/src/basics/submodules/logger.cpp b/pythonbindings/src/basics/submodules/logger.cpp
index d46648e349b44243581e083f3561e8a13648f3b2..fa7e00e4dca06581b7a14d2bcf2628ed6af60001 100644
--- a/pythonbindings/src/basics/submodules/logger.cpp
+++ b/pythonbindings/src/basics/submodules/logger.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file logger.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <pybind11/iostream.h>
 #include <basics/Core/Logger/Logger.h>
@@ -12,12 +44,12 @@ namespace logger
         py::module loggerModule = parentModule.def_submodule("logger");
 
         py::class_<logging::Logger>(loggerModule, "Logger")
-        .def("add_stdout", [](){
+        .def_static("add_stdout", [](){
             logging::Logger::addStream(&std::cout);
         })
-        .def("set_debug_level", &logging::Logger::setDebugLevel)
-        .def("time_stamp", &logging::Logger::timeStamp)
-        .def("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers);
+        .def_static("set_debug_level", &logging::Logger::setDebugLevel)
+        .def_static("time_stamp", &logging::Logger::timeStamp, py::arg("time_stamp"))
+        .def_static("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers, py::arg("print"));
 
         loggerModule.attr("log") = logging::out;
         py::enum_<logging::Logger::Level>(loggerModule, "Level")
diff --git a/pythonbindings/src/cpu/cpu.cpp b/pythonbindings/src/cpu/cpu.cpp
index 554de53b47446366693aed31d534f6145ebea8ba..75143d913596c74a26f25ce64f1e6d214a442e34 100644
--- a/pythonbindings/src/cpu/cpu.cpp
+++ b/pythonbindings/src/cpu/cpu.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file cpu.cpp
+//! \ingroup cpu
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include "submodules/boundaryconditions.cpp"
 #include "submodules/simulationconfig.cpp"
diff --git a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
index 3bff7bc069ca20fe1c0cf3d1847b9714e0381505..ac9ec8605dec51e8374c850b1c1b58314674c426 100644
--- a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
+++ b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file boundaryconditions.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <BoundaryConditions/DensityBCAdapter.h>
diff --git a/pythonbindings/src/cpu/submodules/geometry.cpp b/pythonbindings/src/cpu/submodules/geometry.cpp
index b7ff4dd761258d41687589d2dd89c3479093753e..4c4c47b002b9c7451a8d788ba82c4a19b78ca96f 100644
--- a/pythonbindings/src/cpu/submodules/geometry.cpp
+++ b/pythonbindings/src/cpu/submodules/geometry.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file geometry.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <geometry3d/GbPoint3D.h>
 #include <geometry3d/GbObject3D.h>
diff --git a/pythonbindings/src/cpu/submodules/kernel.cpp b/pythonbindings/src/cpu/submodules/kernel.cpp
index fb291790632cc2041410f60a14fca8d966283343..b00d86579540a299e4bf3ed47bc09d4386f420a2 100644
--- a/pythonbindings/src/cpu/submodules/kernel.cpp
+++ b/pythonbindings/src/cpu/submodules/kernel.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file kernel.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <memory>
 #include <pybind11/pybind11.h>
 #include <simulationconfig/KernelFactory.h>
diff --git a/pythonbindings/src/cpu/submodules/simulationconfig.cpp b/pythonbindings/src/cpu/submodules/simulationconfig.cpp
index 60af4e36af4dca67e9262dd9f5ee1f46d5b7bb58..09d91f44e85f03c6150c56ce5762e7629212fba0 100644
--- a/pythonbindings/src/cpu/submodules/simulationconfig.cpp
+++ b/pythonbindings/src/cpu/submodules/simulationconfig.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file simulationconfig.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <simulationconfig/Simulation.h>
 
diff --git a/pythonbindings/src/cpu/submodules/simulationparameters.cpp b/pythonbindings/src/cpu/submodules/simulationparameters.cpp
index acc272f2ee412cfbafd9007b4b18610cfd0a1e9b..b33d20f9e5d335a0ed381faf8786d88cc7642738 100644
--- a/pythonbindings/src/cpu/submodules/simulationparameters.cpp
+++ b/pythonbindings/src/cpu/submodules/simulationparameters.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file simulationparameters.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <complex>
diff --git a/pythonbindings/src/cpu/submodules/writer.cpp b/pythonbindings/src/cpu/submodules/writer.cpp
index d5ec527a27caf63d9a3066c51e1f675b307fe0b2..f1cfd8934c2da84266a93d5bcd91eb26f5f69d3f 100644
--- a/pythonbindings/src/cpu/submodules/writer.cpp
+++ b/pythonbindings/src/cpu/submodules/writer.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file writer.cpp
+//! \ingroup submodules
+//! \author Sven Marcus, Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <simulationconfig/WriterConfiguration.h>
 
diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp
index dc110cd5e19a9aad4937f9c2133ddf74c0ddf9bf..9eb160ae7765f16a6437e343cb878bb4b80877bf 100644
--- a/pythonbindings/src/gpu/gpu.cpp
+++ b/pythonbindings/src/gpu/gpu.cpp
@@ -1,14 +1,50 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file gpu.cpp
+//! \ingroup gpu
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
-#include "submodules/actuator_line.cpp"
 #include "submodules/pre_collision_interactor.cpp"
 #include "submodules/simulation.cpp"
 #include "submodules/parameter.cpp"
 #include "submodules/boundary_conditions.cpp"
 #include "submodules/communicator.cpp"
 #include "submodules/cuda_memory_manager.cpp"
+#include "submodules/probes.cpp"
+#include "submodules/precursor_writer.cpp"
 #include "submodules/grid_provider.cpp"
 #include "submodules/grid_generator.cpp"
-#include "submodules/probes.cpp"
+#include "submodules/turbulence_models.cpp"
+#include "submodules/transient_bc_setter.cpp"
+#include "submodules/actuator_farm.cpp"
+#include "submodules/grid_scaling_factory.cpp"
 
 namespace gpu
 {
@@ -20,13 +56,17 @@ namespace gpu
         simulation::makeModule(gpuModule);
         parameter::makeModule(gpuModule);
         pre_collision_interactor::makeModule(gpuModule);
-        actuator_line::makeModule(gpuModule);
+        actuator_farm::makeModule(gpuModule);
         boundary_conditions::makeModule(gpuModule);
+        transient_bc_setter::makeModule(gpuModule);
         communicator::makeModule(gpuModule); 
         cuda_memory_manager::makeModule(gpuModule);
-        grid_provider::makeModule(gpuModule);
         probes::makeModule(gpuModule);
+        precursor_writer::makeModule(gpuModule);
         grid_generator::makeModule(gpuModule);
+        grid_provider::makeModule(gpuModule);
+        turbulence_model::makeModule(gpuModule);
+        grid_scaling_factory::makeModule(gpuModule);
         return gpuModule;
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/actuator_farm.cpp b/pythonbindings/src/gpu/submodules/actuator_farm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a930616db3e0d0713bdf57157387d75d171603de
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/actuator_farm.cpp
@@ -0,0 +1,171 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file actuator_farm.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+class PyActuatorFarm : public ActuatorFarm 
+{
+public:
+    using ActuatorFarm::ActuatorFarm; // Inherit constructors
+    void calcBladeForces() override 
+    { 
+        PYBIND11_OVERRIDE_NAME(void, ActuatorFarm, "calc_blade_forces", calcBladeForces); 
+    }
+};
+namespace actuator_farm
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        using arr = py::array_t<float, py::array::c_style>;
+        
+        py::class_<ActuatorFarm, PreCollisionInteractor, PyActuatorFarm, std::shared_ptr<ActuatorFarm>>(parentModule, "ActuatorFarm", py::dynamic_attr())
+        .def(py::init<  const uint,
+                        const real,
+                        const uint,
+                        const real,
+                        int,
+                        const real,
+                        const real,
+                        const bool>(), 
+                        py::arg("number_of_blades_per_turbine"), 
+                        py::arg("density"), 
+                        py::arg("number_of_nodes_per_blade"), 
+                        py::arg("epsilon"),
+                        py::arg("level"), 
+                        py::arg("delta_t"), 
+                        py::arg("delta_x"),
+                        py::arg("use_host_arrays"))
+        .def_property_readonly("number_of_turbines", &ActuatorFarm::getNumberOfTurbines)
+        .def_property_readonly("number_of_nodes_per_blade", &ActuatorFarm::getNumberOfNodesPerBlade)
+        .def_property_readonly("number_of_blades_per_turbine", &ActuatorFarm::getNumberOfBladesPerTurbine)
+        .def_property_readonly("number_of_nodes", &ActuatorFarm::getNumberOfNodes)
+        .def_property_readonly("number_of_indices", &ActuatorFarm::getNumberOfIndices)
+        .def_property_readonly("density", &ActuatorFarm::getDensity)
+        .def_property_readonly("delta_t", &ActuatorFarm::getDeltaT)
+        .def_property_readonly("delta_x", &ActuatorFarm::getDeltaX)
+
+        .def("add_turbine", &ActuatorFarm::addTurbine, py::arg("posX"), py::arg("posY"), py::arg("posZ"), py::arg("diameter"), py::arg("omega"), py::arg("azimuth"), py::arg("yaw"), py::arg("bladeRadii"))
+
+        .def("get_turbine_pos", [](ActuatorFarm& al, uint turbine){ real position[3] = {al.getTurbinePosX(turbine), al.getTurbinePosY(turbine), al.getTurbinePosZ(turbine)}; return arr(3,  position); }, py::arg("turbine"))
+        .def("get_turbine_azimuth", &ActuatorFarm::getTurbineAzimuth, py::arg("turbine"))
+        .def("get_turbine_yaw", &ActuatorFarm::getTurbineYaw, py::arg("turbine"))
+        .def("get_turbine_omega", &ActuatorFarm::getTurbineOmega, py::arg("turbine"))
+        .def("get_all_azimuths", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllAzimuths()); } )
+        .def("get_all_yaws", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllYaws()); } )
+        .def("get_all_omegas", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllOmegas()); } )
+        .def("get_all_turbine_pos_x", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosX()); } )
+        .def("get_all_turbine_pos_y", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosY()); } )
+        .def("get_all_turbine_pos_z", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosZ()); } )
+    
+        .def("get_all_blade_radii", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfNodesPerBlade()}, al.getAllBladeRadii()); } )
+        .def("get_all_blade_coords_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsX()); } )
+        .def("get_all_blade_coords_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsY()); } )
+        .def("get_all_blade_coords_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsZ()); } )        
+        .def("get_all_blade_velocities_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesX()); } )
+        .def("get_all_blade_velocities_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesY()); } )
+        .def("get_all_blade_velocities_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesZ()); } )
+        .def("get_all_blade_forces_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesX()); } )
+        .def("get_all_blade_forces_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesY()); } )
+        .def("get_all_blade_forces_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesZ()); } )
+
+        .def("get_turbine_blade_radii", [](ActuatorFarm& al, uint turbine){ return arr(al.getNumberOfNodesPerBlade(), al.getTurbineBladeRadiiDevice(turbine)); } , py::arg("turbine"))
+        .def("get_turbine_blade_coords_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") )        
+        .def("get_turbine_blade_velocities_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") )
+
+        .def("get_all_blade_radii_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getAllBladeRadiiDevice()); } )
+        .def("get_all_blade_coords_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsXDevice()); } )
+        .def("get_all_blade_coords_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsYDevice()); } )
+        .def("get_all_blade_coords_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsZDevice()); } )        
+        .def("get_all_blade_velocities_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesXDevice()); } )
+        .def("get_all_blade_velocities_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesYDevice()); } )
+        .def("get_all_blade_velocities_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesZDevice()); } )
+        .def("get_all_blade_forces_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesXDevice()); } )
+        .def("get_all_blade_forces_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesYDevice()); } )
+        .def("get_all_blade_forces_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesZDevice()); } )
+
+        .def("get_turbine_blade_radii_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeRadiiDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_coords_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") )        
+        .def("get_turbine_blade_velocities_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_velocities_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") )
+        .def("get_turbine_blade_forces_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") )
+
+        .def("set_all_azimuths", [](ActuatorFarm& al, arr azimuths){ al.setAllAzimuths(static_cast<float *>(azimuths.request().ptr)); }, py::arg("azimuths"))
+        .def("set_all_yaws", [](ActuatorFarm& al, arr yaws){ al.setAllYaws(static_cast<float *>(yaws.request().ptr)); }, py::arg("yaws"))
+        .def("set_all_omegas", [](ActuatorFarm& al, arr omegas){ al.setAllOmegas(static_cast<float *>(omegas.request().ptr)); }, py::arg("omegas"))
+
+        .def("set_turbine_azimuth", &ActuatorFarm::setTurbineAzimuth, py::arg("turbine"), py::arg("azimuth"))
+        .def("set_turbine_yaw", &ActuatorFarm::setTurbineYaw, py::arg("turbine"), py::arg("yaw"))
+        .def("set_turbine_omega", &ActuatorFarm::setTurbineOmega, py::arg("turbine"), py::arg("omega"))
+
+        .def("set_all_blade_coords", [](ActuatorFarm& al, arr coordsX, arr coordsY, arr coordsZ)
+        { 
+            al.setAllBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); 
+        }, py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") )
+        .def("set_all_blade_velocities", [](ActuatorFarm& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ)
+        { 
+            al.setAllBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); 
+        }, py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") )
+        .def("set_all_blade_forces", [](ActuatorFarm& al, arr forcesX, arr forcesY, arr forcesZ)
+        { 
+            al.setAllBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr));
+        }, py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") )     
+        .def("set_turbine_blade_coords", [](ActuatorFarm& al, uint turbine, arr coordsX, arr coordsY, arr coordsZ)
+        { 
+            al.setTurbineBladeCoords(turbine, static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); 
+        }, py::arg("turbine"), py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") )
+        .def("set_turbine_blade_velocities", [](ActuatorFarm& al, uint turbine, arr velocitiesX, arr velocitiesY, arr velocitiesZ)
+        {
+            al.setTurbineBladeVelocities(turbine, static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); 
+        }, py::arg("turbine"), py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") )
+        .def("set_turbine_blade_forces", [](ActuatorFarm& al, uint turbine, arr forcesX, arr forcesY, arr forcesZ)
+        { 
+            al.setTurbineBladeForces(turbine, static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); 
+        }, py::arg("turbine"), py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") )
+        .def("calc_blade_forces", &ActuatorFarm::calcBladeForces);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/actuator_line.cpp b/pythonbindings/src/gpu/submodules/actuator_line.cpp
deleted file mode 100644
index 3207fadbc37df38e53e00adcb9a86f0b8e82ba98..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/actuator_line.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <pybind11/numpy.h>
-#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h>
-#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
-class PyActuatorLine : public ActuatorLine 
-{
-public:
-    using ActuatorLine::ActuatorLine; // Inherit constructors
-    void calcBladeForces() override 
-    { 
-        PYBIND11_OVERRIDE_NAME(void, ActuatorLine, "calc_blade_forces", calcBladeForces,); 
-    }
-};
-namespace actuator_line
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        using arr = py::array_t<float, py::array::c_style>;
-        
-        py::class_<ActuatorLine, PreCollisionInteractor, PyActuatorLine, std::shared_ptr<ActuatorLine>>(parentModule, "ActuatorLine", py::dynamic_attr())
-        .def(py::init<  const uint,
-                        const real,
-                        const uint,
-                        const real,
-                        real, real, real,
-                        const real,
-                        int,
-                        const real,
-                        const real>(), 
-                        "n_blades", 
-                        "density", 
-                        "n_blade_nodes", 
-                        "epsilon",
-                        "turbine_pos_x", "turbine_pos_y", "turbine_pos_z", 
-                        "diameter", 
-                        "level", 
-                        "delta_t", 
-                        "delta_x")
-        .def_property("omega", &ActuatorLine::getOmega, &ActuatorLine::setOmega)
-        .def_property("azimuth", &ActuatorLine::getAzimuth, &ActuatorLine::setAzimuth)
-        .def_property("yaw", &ActuatorLine::getYaw, &ActuatorLine::setYaw)
-        .def_property_readonly("n_blades", &ActuatorLine::getNBlades)
-        .def_property_readonly("n_blade_nodes", &ActuatorLine::getNBladeNodes)
-        .def_property_readonly("n_nodes", &ActuatorLine::getNNodes)
-        .def_property_readonly("n_indices", &ActuatorLine::getNIndices)
-        .def_property_readonly("density", &ActuatorLine::getDensity)
-        .def_property_readonly("position_x", &ActuatorLine::getPositionX)
-        .def_property_readonly("position_y", &ActuatorLine::getPositionY)
-        .def_property_readonly("position_z", &ActuatorLine::getPositionZ)
-        .def_property_readonly("position", [](ActuatorLine& al){ real position[3] = {al.getPositionX(), al.getPositionY(), al.getPositionZ()}; return arr(3, position); } )
-        .def("get_radii", [](ActuatorLine& al){ return arr(al.getNBladeNodes(), al.getBladeRadii()); } )
-        .def("get_blade_coords_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsX()); } )
-        .def("get_blade_coords_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsY()); } )
-        .def("get_blade_coords_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsZ()); } )        
-        .def("get_blade_velocities_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesX()); } )
-        .def("get_blade_velocities_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesY()); } )
-        .def("get_blade_velocities_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesZ()); } )
-        .def("get_blade_forces_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesX()); } )
-        .def("get_blade_forces_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesY()); } )
-        .def("get_blade_forces_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesZ()); } )
-        .def("set_blade_coords", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ){ 
-            al.setBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); } )
-        .def("set_blade_velocities", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ){ 
-            al.setBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); } )
-        .def("set_blade_forces", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ){ 
-            al.setBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); } )
-        .def("calc_blade_forces", &ActuatorLine::calcBladeForces);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp
index 8f941a8705c225275d25291205ebdaeef8de5c9e..865817bb16f7b164c40bdc066645fb2e1f1c842e 100644
--- a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp
+++ b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp
@@ -1,5 +1,38 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file boindary_conditions.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/GridGenerator/grid/BoundaryConditions/Side.h>
+#include "gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 
 namespace boundary_conditions
 {
@@ -14,7 +47,59 @@ namespace boundary_conditions
         .value("PY", SideType::PY)
         .value("MZ", SideType::MZ)
         .value("PZ", SideType::PZ)
-        .value("GEOMETRY", SideType::GEOMETRY)
-        .export_values();
+        .value("GEOMETRY", SideType::GEOMETRY);
+
+        py::class_<BoundaryConditionFactory>(parentModule, "BoundaryConditionFactory")
+        .def(py::init<>())
+        .def("set_velocity_boundary_condition", &BoundaryConditionFactory::setVelocityBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_no_slip_boundary_condition", &BoundaryConditionFactory::setNoSlipBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_slip_boundary_condition", &BoundaryConditionFactory::setSlipBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_pressure_boundary_condition", &BoundaryConditionFactory::setPressureBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_stress_boundary_condition", &BoundaryConditionFactory::setStressBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_precursor_boundary_condition", &BoundaryConditionFactory::setPrecursorBoundaryCondition, py::arg("boundary_condition_type"))
+        .def("set_geometry_boundary_condition", &BoundaryConditionFactory::setGeometryBoundaryCondition, py::arg("boundary_condition_type"));
+
+        py::enum_<BoundaryConditionFactory::VelocityBC>(parentModule, "VelocityBC")
+        .value("VelocitySimpleBounceBackCompressible", BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible)
+        .value("VelocityIncompressible", BoundaryConditionFactory::VelocityBC::VelocityIncompressible)
+        .value("VelocityCompressible", BoundaryConditionFactory::VelocityBC::VelocityCompressible)
+        .value("VelocityAndPressureCompressible", BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible)
+        .value("NotSpecified", BoundaryConditionFactory::VelocityBC::NotSpecified);
+
+
+        py::enum_<BoundaryConditionFactory::NoSlipBC>(parentModule, "NoSlipBC")
+        .value("NoSlipImplicitBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipImplicitBounceBack)
+        .value("NoSlipBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack)
+        .value("NoSlipIncompressible", BoundaryConditionFactory::NoSlipBC::NoSlipIncompressible)
+        .value("NoSlipCompressible", BoundaryConditionFactory::NoSlipBC::NoSlipCompressible)
+        .value("NoSlip3rdMomentsCompressible", BoundaryConditionFactory::NoSlipBC::NoSlip3rdMomentsCompressible);
+
+        py::enum_<BoundaryConditionFactory::SlipBC>(parentModule, "SlipBC")
+        .value("SlipIncompressible", BoundaryConditionFactory::SlipBC::SlipIncompressible)
+        .value("SlipCompressible", BoundaryConditionFactory::SlipBC::SlipCompressible)
+        .value("SlipBounceBack", BoundaryConditionFactory::SlipBC::SlipBounceBack)
+        .value("SlipCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity)
+        .value("SlipPressureCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipPressureCompressibleTurbulentViscosity)
+        .value("NotSpecified", BoundaryConditionFactory::SlipBC::NotSpecified);
+
+        py::enum_<BoundaryConditionFactory::PressureBC>(parentModule, "PressureBC")
+        .value("PressureEquilibrium", BoundaryConditionFactory::PressureBC::PressureEquilibrium)
+        .value("PressureEquilibrium2", BoundaryConditionFactory::PressureBC::PressureEquilibrium2)
+        .value("PressureNonEquilibriumIncompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumIncompressible)
+        .value("PressureNonEquilibriumCompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible)
+        .value("OutflowNonReflective", BoundaryConditionFactory::PressureBC::OutflowNonReflective)
+        .value("OutflowNonReflectivePressureCorrection", BoundaryConditionFactory::PressureBC::OutflowNonReflectivePressureCorrection)
+        .value("NotSpecified", BoundaryConditionFactory::PressureBC::NotSpecified);
+
+        py::enum_<BoundaryConditionFactory::StressBC>(parentModule, "StressBC")
+        .value("StressCompressible", BoundaryConditionFactory::StressBC::StressCompressible)
+        .value("StressBounceBack", BoundaryConditionFactory::StressBC::StressBounceBack)
+        .value("StressPressureBounceBack", BoundaryConditionFactory::StressBC::StressPressureBounceBack)
+        .value("NotSpecified", BoundaryConditionFactory::StressBC::NotSpecified);
+
+        py::enum_<BoundaryConditionFactory::PrecursorBC>(parentModule, "PrecursorBC")
+        .value("VelocityPrecursor", BoundaryConditionFactory::PrecursorBC::VelocityPrecursor)
+        .value("DistributionsPrecursor", BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor)
+        .value("NotSpecified", BoundaryConditionFactory::PrecursorBC::NotSpecified);
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp
index edb36e2c2f774903590a16a0b406c721662827b1..26a57061933fbdbfe3447ec89eeb07116a9b974b 100644
--- a/pythonbindings/src/gpu/submodules/communicator.cpp
+++ b/pythonbindings/src/gpu/submodules/communicator.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file communicator.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
 
@@ -8,7 +40,7 @@ namespace communicator
     void makeModule(py::module_ &parentModule)
     {
         py::class_<vf::gpu::Communicator, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator")
-        .def("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference)
+        .def_static("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference)
         .def("get_number_of_process", &vf::gpu::Communicator::getNummberOfProcess)
         .def("get_pid", &vf::gpu::Communicator::getPID);
     }
diff --git a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
index bf27080cb3cd050343ba42b0571827ed58870cfd..bbff4832cb73f47e3d1a5a6abd78e21da2473deb 100644
--- a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
+++ b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file cuda_memory_manager.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
 #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
@@ -10,6 +42,6 @@ namespace cuda_memory_manager
     void makeModule(py::module_ &parentModule)
     {
         py::class_<CudaMemoryManager, std::shared_ptr<CudaMemoryManager>>(parentModule, "CudaMemoryManager")
-        .def(py::init<std::shared_ptr<Parameter>>(), "parameter");
+        .def(py::init<std::shared_ptr<Parameter>>(), py::arg("parameter"));
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
index 579c06c4e00cae9646ced8b554d71631eeb7e793..3e9fb5655e26ffa6053a205da5a3e3f0f2ecd49f 100644
--- a/pythonbindings/src/gpu/submodules/grid_generator.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -1,4 +1,37 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file grid_generator.cpp
+//! \ingroup submodules
+//! \author Henry Korb, Henrik Asmuth
+//=======================================================================================
 #include <pybind11/pybind11.h>
+#include "gpu/GridGenerator/utilities/communication.h"
 #include "gpu/GridGenerator/geometries/Object.h"
 #include "gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h"
 #include "gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h"
@@ -17,51 +50,63 @@ namespace grid_generator
     {  
         py::module gridGeneratorModule = parentModule.def_submodule("grid_generator");
 
+        //TODO:
+        // py::enum_<CommunicationDirections>(gridGeneratorModule, "CommunicationDirections")
+        // .value("MX", CommunicationDirections::MX)
+        // .value("PX", CommunicationDirections::PX)
+        // .value("MY", CommunicationDirections::MY)
+        // .value("PY", CommunicationDirections::PY)
+        // .value("MZ", CommunicationDirections::MZ)
+        // .value("PZ", CommunicationDirections::PZ);
+
         py::class_<GridFactory, std::shared_ptr<GridFactory>>(gridGeneratorModule, "GridFactory")
-        .def("make", &GridFactory::make, py::return_value_policy::reference);
+        .def_static("make", &GridFactory::make, py::return_value_policy::reference);
 
-        py::class_<BoundingBox>(gridGeneratorModule, "BoundingBox")
-        .def(py::init<real, real, real, real, real, real>(),"min_x","max_x","min_y","max_y","min_z","max_z");
+        py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(gridGeneratorModule, "BoundingBox")
+        .def(py::init<real, real, real, real, real, real>(), py::arg("min_x"), py::arg("max_x"), py::arg("min_y"), py::arg("max_y"), py::arg("min_z"), py::arg("max_z"));
 
         py::class_<Object, std::shared_ptr<Object>>(gridGeneratorModule, "Object");
         
         py::class_<Conglomerate, Object, std::shared_ptr<Conglomerate>>(gridGeneratorModule, "Conglomerate")
-        .def("make_shared", &Conglomerate::makeShared, py::return_value_policy::reference)
-        .def("add", &Conglomerate::add)
-        .def("subtract", &Conglomerate::subtract);
+        .def_static("make_shared", &Conglomerate::makeShared, py::return_value_policy::reference)
+        .def("add", &Conglomerate::add, py::arg("object"))
+        .def("subtract", &Conglomerate::subtract, py::arg("object"));
 
         py::class_<Cuboid, Object, std::shared_ptr<Cuboid>>(gridGeneratorModule, "Cuboid")
         .def(py::init<const double&, const double&, const double&, const double&, const double&, const double&>(),
-                        "min_x1", "min_x2", "min_x3", "max_x1", "max_x2", "max_x3");
+                        py::arg("min_x1"), py::arg("min_x2"), py::arg("min_x3"), py::arg("max_x1"), py::arg("max_x2"), py::arg("max_x3"));
 
         py::class_<Sphere, Object, std::shared_ptr<Sphere>>(gridGeneratorModule, "Sphere")
-        .def("make_shared", &Sphere::makeShared, py::return_value_policy::reference);
+        .def_static("make_shared", &Sphere::makeShared, py::return_value_policy::reference);
 
         py::class_<TriangularMesh, Object, std::shared_ptr<TriangularMesh>>(gridGeneratorModule, "TriangularMesh")
-        .def("make", &TriangularMesh::make, py::return_value_policy::reference);
+        .def_static("make", &TriangularMesh::make, py::return_value_policy::reference);
 
         py::class_<GridBuilder, std::shared_ptr<GridBuilder>>(gridGeneratorModule, "GridBuilder")
-        .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels)
-        .def("get_grid", &GridBuilder::getGrid);
+        .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels);
 
         py::class_<LevelGridBuilder, GridBuilder, std::shared_ptr<LevelGridBuilder>>(gridGeneratorModule, "LevelGridBuilder")
-        .def("get_grid", py::overload_cast<int, int>(&LevelGridBuilder::getGrid))
-        .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition)
-        .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition)
-        .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition)
-        .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition)
-        .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition)
-        .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition);
+        .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z"))
+        .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition, py::arg("side_type"), py::arg("vx"), py::arg("vy"), py::arg("vz"))
+        .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition, py::arg("side_type"), py::arg("rho"))
+        .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition, py::arg("periodic_x"), py::arg("periodic_y"), py::arg("periodic_z"))
+        .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition, py::arg("side_type"))
+        .def("set_precursor_boundary_condition", &LevelGridBuilder::setPrecursorBoundaryCondition, py::arg("side_type"), py::arg("file_collection"), py::arg("n_t_read"), py::arg("velocity_x")=0.0f, py::arg("velocity_y")=0.0f, py::arg("velocity_z")=0.0f, py::arg("file_level_to_grid_level_map")=std::vector<uint>())
+        .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z"), py::arg("sampling_offset"), py::arg("z0"), py::arg("dx"));
 
         py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder")
-        .def("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference)
-        .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid)
-        .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid))
-        .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid))
-        .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry))
-        .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry))
+        .def_static("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference, py::arg("grid_factory"))
+        .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid, py::arg("start_x"), py::arg("start_y"), py::arg("start_z"), py::arg("end_x"), py::arg("end_y"), py::arg("end_z"), py::arg("delta"))
+        .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid), py::arg("grid_shape"))
+        .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid), py::arg("grid_shape"), py::arg("level_fine"))
+        .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry), py::arg("solid_object"))
+        .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry), py::arg("solid_object"), py::arg("level"))
         .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels)
-        .def("build_grids", &MultipleGridBuilder::buildGrids);
+        .def("build_grids", &MultipleGridBuilder::buildGrids, py::arg("lbm_or_gks"), py::arg("enable_thin_walls"))
+        .def("set_subdomain_box", &MultipleGridBuilder::setSubDomainBox, py::arg("bounding_box"))
+        .def("find_communication_indices", &MultipleGridBuilder::findCommunicationIndices)
+        .def("set_communication_process", &MultipleGridBuilder::setCommunicationProcess)
+        .def("set_number_of_layers", &MultipleGridBuilder::setNumberOfLayers, py::arg("number_of_layers_fine"), py::arg("number_of_layers_between_levels"));
 
         return gridGeneratorModule;
     }
diff --git a/pythonbindings/src/gpu/submodules/grid_provider.cpp b/pythonbindings/src/gpu/submodules/grid_provider.cpp
index 02ff273e2cd1a2022943e19c9a48a447d9dfe54b..717e9d5cd82100636a5398c09662a0895ce8fb56 100644
--- a/pythonbindings/src/gpu/submodules/grid_provider.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_provider.cpp
@@ -1,8 +1,36 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file grid_provider
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include "gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-// #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
-// #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
-// #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
 
 namespace grid_provider
 {
@@ -11,6 +39,6 @@ namespace grid_provider
     void makeModule(py::module_ &parentModule)
     {
         py::class_<GridProvider, std::shared_ptr<GridProvider>>(parentModule, "GridProvider")
-        .def("make_grid_generator", &GridProvider::makeGridGenerator, py::return_value_policy::reference);
+        .def_static("make_grid_generator", &GridProvider::makeGridGenerator, py::return_value_policy::reference, py::arg("builder"), py::arg("para"), py::arg("cuda_memory_manager"), py::arg("communicator"));
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp b/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a3a572875a4695871c482a4308acab4214dbb481
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp
@@ -0,0 +1,52 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file grid_scaling_factory.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
+#include <pybind11/pybind11.h>
+#include <gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h>
+
+namespace grid_scaling_factory
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        
+        py::class_<GridScalingFactory, std::shared_ptr<GridScalingFactory>>(parentModule, "GridScalingFactory")
+        .def(py::init<>())
+        .def("set_scaling_factory", &GridScalingFactory::setScalingFactory, py::arg("scaling_type"));
+
+        py::enum_<GridScalingFactory::GridScaling>(parentModule, "GridScaling")
+        .value("ScaleCompressible", GridScalingFactory::GridScaling::ScaleCompressible)
+        .value("ScaleRhoSq", GridScalingFactory::GridScaling::ScaleRhoSq)
+        .value("NotSpecified", GridScalingFactory::GridScaling::NotSpecified);
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp
index 7b4e67f101e3928abbd4262557864ea1d0f45b02..a7c42223e6a5bfa3caa89c0879e4133fc4123ad0 100644
--- a/pythonbindings/src/gpu/submodules/parameter.cpp
+++ b/pythonbindings/src/gpu/submodules/parameter.cpp
@@ -1,10 +1,46 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file parameter.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <pybind11/functional.h>
 #include <pybind11/stl.h>
 #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
+#include "lbm/constants/NumericConstants.h"
 #include <basics/config/ConfigurationFile.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
 
+
+using namespace vf::lbm::constant;
+
 namespace parameter
 {
     namespace py = pybind11;
@@ -13,42 +49,44 @@ namespace parameter
     {
         py::class_<Parameter, std::shared_ptr<Parameter>>(parentModule, "Parameter")
         .def(py::init<
-                const vf::basics::ConfigurationFile&, 
                 int,
-                int
-                >(),
-                "config_data",
-                "number_of_processes",
-                "my_ID")
-        .def("set_forcing", &Parameter::setForcing)
-        .def("set_diff_on", &Parameter::setDiffOn)
-        .def("set_comp_on", &Parameter::setCompOn)
-        .def("set_max_level", &Parameter::setMaxLevel)
-        .def("set_t_end", &Parameter::setTEnd)
-        .def("set_t_out", &Parameter::setTOut)
-        .def("set_t_start_out", &Parameter::setTStartOut)
-        .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel)
-        .def("set_output_path", &Parameter::setOutputPath)
-        .def("set_output_prefix", &Parameter::setOutputPrefix)
-        .def("set_f_name", &Parameter::setFName)
-        .def("set_print_files", &Parameter::setPrintFiles)
-        .def("set_temperature_init", &Parameter::setTemperatureInit)
-        .def("set_temperature_BC", &Parameter::setTemperatureBC)
-        .def("set_viscosity", &Parameter::setViscosity)
-        .def("set_velocity", &Parameter::setVelocity)
-        .def("set_viscosity_ratio", &Parameter::setViscosityRatio)
-        .def("set_velocity_ratio", &Parameter::setVelocityRatio)
-        .def("set_density_ratio", &Parameter::setDensityRatio)
-        .def("set_devices", &Parameter::setDevices)
-        .def("set_is_body_force", &Parameter::setIsBodyForce)
-        .def("set_use_AMD", &Parameter::setUseAMD)
-        .def("set_use_Wale", &Parameter::setUseWale)
-        .def("set_SGS_constant", &Parameter::setSGSConstant)
-        .def("set_main_kernel", &Parameter::setMainKernel)
-        .def("set_AD_kernel", &Parameter::setADKernel)
-        .def("set_use_AMD", &Parameter::setUseAMD)
-        .def("set_use_Wale", &Parameter::setUseWale)
-        .def("set_SGS_constant", &Parameter::setSGSConstant)
+                int,
+                std::optional<const vf::basics::ConfigurationFile*>>(),
+                py::arg("number_of_processes"),
+                py::arg("my_ID"),
+                py::arg("config_data"))
+        .def(py::init<int, int>(),
+                py::arg("number_of_processes"),
+                py::arg("my_ID"))
+        .def(py::init<const vf::basics::ConfigurationFile*>(), py::arg("config_data"))
+        .def("set_forcing", &Parameter::setForcing, py::arg("forcing_x"), py::arg("forcing_y"), py::arg("forcing_z"))
+        .def("set_quadric_limiters", &Parameter::setQuadricLimiters, py::arg("quadric_limiter_p"), py::arg("quadric_limiter_m"), py::arg("quadric_limiter_d"))
+        .def("set_diff_on", &Parameter::setDiffOn, py::arg("is_diff"))
+        .def("set_comp_on", &Parameter::setCompOn, py::arg("is_comp"))
+        .def("set_max_level", &Parameter::setMaxLevel, py::arg("number_of_levels"))
+        .def("set_timestep_end", &Parameter::setTimestepEnd, py::arg("tend"))
+        .def("set_timestep_out", &Parameter::setTimestepOut, py::arg("tout"))
+        .def("set_timestep_start_out", &Parameter::setTimestepStartOut, py::arg("t_start_out"))
+        .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel, py::arg("timestep"))
+        .def("set_calc_turbulence_intensity", &Parameter::setCalcTurbulenceIntensity, py::arg("calc_velocity_and_fluctuations"))
+        .def("set_output_path", &Parameter::setOutputPath, py::arg("o_path"))
+        .def("set_output_prefix", &Parameter::setOutputPrefix, py::arg("o_prefix"))
+        .def("set_print_files", &Parameter::setPrintFiles, py::arg("print_files"))
+        .def("set_temperature_init", &Parameter::setTemperatureInit, py::arg("temp"))
+        .def("set_temperature_BC", &Parameter::setTemperatureBC, py::arg("temp_bc"))
+        .def("set_viscosity_LB", &Parameter::setViscosityLB, py::arg("viscosity"))
+        .def("set_velocity_LB", &Parameter::setVelocityLB, py::arg("velocity"))
+        .def("set_viscosity_ratio", &Parameter::setViscosityRatio, py::arg("viscosity_ratio"))
+        .def("set_velocity_ratio", &Parameter::setVelocityRatio, py::arg("velocity_ratio"))
+        .def("set_density_ratio", &Parameter::setDensityRatio, py::arg("density_ratio"))
+        .def("set_devices", &Parameter::setDevices, py::arg("devices"))
+        .def("set_max_dev", &Parameter::setMaxDev, py::arg("max_dev"))
+        .def("set_is_body_force", &Parameter::setIsBodyForce, py::arg("is_body_force"))
+        .def("set_use_streams", &Parameter::setUseStreams, py::arg("use_streams"))
+        .def("set_main_kernel", &Parameter::setMainKernel, py::arg("kernel"))
+        .def("set_AD_kernel", &Parameter::setADKernel, py::arg("ad_kernel"))
+        .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor, py::arg("has_wall_monitor"))
+        .def("set_outflow_pressure_correction_factor", &Parameter::setOutflowPressureCorrectionFactor, py::arg("correction_factor"))
         .def("set_initial_condition", [](Parameter &para, std::function<std::vector<float>(real, real, real)> &init_func)
         {
             para.setInitialCondition([init_func](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz)
@@ -59,9 +97,46 @@ namespace parameter
                 vy = values[2];
                 vz = values[3];
             });
-        })
-        .def("add_actuator", &Parameter::addActuator)
-        .def("add_probe", &Parameter::addProbe)
+        }, py::arg("init_func"))
+        .def("set_initial_condition_uniform", [](Parameter &para, real velocity_x, real velocity_y, real velocity_z)
+        {
+            para.setInitialCondition([velocity_x, velocity_y, velocity_z](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) // must capture values explicitly!
+            {
+                rho = c0o1;
+                vx = velocity_x;
+                vy = velocity_y;
+                vz = velocity_z;
+            });
+        }, py::arg("velocity_x"), py::arg("velocity_y"), py::arg("velocity_z"))
+        .def("set_initial_condition_log_law", [](Parameter &para, real u_star, real z0, real velocityRatio)
+        {
+            para.setInitialCondition(
+                [u_star, z0, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz)
+                {
+                    coordZ = coordZ > c0o1 ? coordZ : c0o1;
+
+                    rho = c0o1;
+                    vx  = u_star/c4o10 * log(coordZ/z0+c1o1) / velocityRatio;
+                    vy = c0o1;
+                    vz = c0o1;
+                }
+            );
+        }, py::arg("u_star"), py::arg("z0"), py::arg("velocity_ratio"))
+        .def("set_initial_condition_perturbed_log_law", [](Parameter &para, real u_star, real z0, real L_x, real L_z, real H, real velocityRatio)
+        {
+            para.setInitialCondition(
+                [u_star, z0, L_x, L_z, H, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz)
+                {
+                    coordZ = coordZ > c0o1 ? coordZ : c0o1;
+                    rho = c0o1;
+                    vx  = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) / velocityRatio; 
+                    vy  = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) / velocityRatio; 
+                    vz  = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) / velocityRatio;
+                }
+            );
+        }, py::arg("u_star"), py::arg("z0"), py::arg("length_x"), py::arg("length_z"), py::arg("height"), py::arg("velocity_ratio"))
+        .def("add_actuator", &Parameter::addActuator, py::arg("actuator"))
+        .def("add_probe", &Parameter::addProbe, py::arg("probe"))
         .def("get_output_path", &Parameter::getOutputPath)
         .def("get_output_prefix", &Parameter::getOutputPrefix)
         .def("get_velocity", &Parameter::getVelocity)
@@ -70,11 +145,9 @@ namespace parameter
         .def("get_viscosity_ratio", &Parameter::getViscosityRatio)
         .def("get_density_ratio", &Parameter::getDensityRatio)
         .def("get_force_ratio", &Parameter::getForceRatio)
-        .def("get_use_AMD", &Parameter::getUseAMD)
-        .def("get_use_Wale", &Parameter::getUseWale)
         .def("get_SGS_constant", &Parameter::getSGSConstant)
         .def("get_is_body_force", &Parameter::getIsBodyForce)
-        .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor)
         ;
+
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp
index 362ee1a8ce6112cfa9543f1b254e10f3e35822a1..308f6c37aada14c8c25c69245f603274ae2f18d8 100644
--- a/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp
+++ b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp
@@ -1,3 +1,36 @@
+
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file pre_collision_interactor.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
 
diff --git a/pythonbindings/src/gpu/submodules/precursor_writer.cpp b/pythonbindings/src/gpu/submodules/precursor_writer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..64164ef9993d7b4f22bff2390b418718f7c3208f
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/precursor_writer.cpp
@@ -0,0 +1,67 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file precursor_writer.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
+#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h>
+
+namespace precursor_writer
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::enum_<OutputVariable>(parentModule, "OutputVariable")
+        .value("Velocities", OutputVariable::Velocities)
+        .value("Distributions", OutputVariable::Distributions);
+
+        py::class_<PrecursorWriter, PreCollisionInteractor, std::shared_ptr<PrecursorWriter>>(parentModule, "PrecursorWriter")
+        .def(py::init < std::string,
+                        std::string,
+                        real,
+                        real, real,
+                        real, real,
+                        uint, uint, 
+                        OutputVariable, 
+                        uint>(),
+                        py::arg("filename"),
+                        py::arg("output_path"), 
+                        py::arg("x_pos"),
+                        py::arg("y_min"), py::arg("y_max"),
+                        py::arg("z_min"), py::arg("z_max"),
+                        py::arg("t_start_out"), py::arg("t_save"), 
+                        py::arg("output_variable"), 
+                        py::arg("max_timesteps_per_file"));
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/probes.cpp b/pythonbindings/src/gpu/submodules/probes.cpp
index 6993d9617d870922d7ed90ed9ecbebb8a797be25..7c26958df81a60f00c9909a91f5576a5931652d4 100644
--- a/pythonbindings/src/gpu/submodules/probes.cpp
+++ b/pythonbindings/src/gpu/submodules/probes.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file probes.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h>
@@ -29,7 +61,7 @@ namespace probes
         .value("SpatioTemporalFlatness", Statistic::SpatioTemporalFlatness);
 
         py::class_<Probe, PreCollisionInteractor, std::shared_ptr<Probe>>(probeModule, "Probe")
-        .def("add_statistic", &Probe::addStatistic)
+        .def("add_statistic", &Probe::addStatistic, py::arg("variable"))
         .def("set_file_name_to_n_out", &Probe::setFileNameToNOut)
         .def("add_all_available_statistics", &Probe::addAllAvailableStatistics);
 
@@ -41,14 +73,14 @@ namespace probes
                         uint, 
                         uint,
                         uint>(), 
-                        "probe_name",
-                        "output_path"
-                        "t_start_avg",
-                        "t_avg",
-                        "t_start_out",
-                        "t_out")
-        .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList)
-        .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane);
+                        py::arg("probe_name"),
+                        py::arg("output_path"),
+                        py::arg("t_start_avg"),
+                        py::arg("t_avg"),
+                        py::arg("t_start_out"),
+                        py::arg("t_out"))
+        .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList, py::arg("point_coords_x"), py::arg("point_coords_y"), py::arg("point_coords_z"))
+        .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane, py::arg("pos_x"), py::arg("pos0_y"), py::arg("pos0_z"), py::arg("pos1_y"), py::arg("pos1_z"), py::arg("n_y"), py::arg("n_z"));
 
         py::class_<PlaneProbe, Probe, std::shared_ptr<PlaneProbe>>(probeModule, "PlaneProbe")
         .def(py::init<
@@ -58,13 +90,13 @@ namespace probes
                         uint, 
                         uint,
                         uint>(), 
-                        "probe_name",
-                        "output_path"
-                        "t_start_avg",
-                        "t_avg",
-                        "t_start_out",
-                        "t_out")
-        .def("set_probe_plane", &PlaneProbe::setProbePlane);
+                        py::arg("probe_name"),
+                        py::arg("output_path"),
+                        py::arg("t_start_avg"),
+                        py::arg("t_avg"),
+                        py::arg("t_start_out"),
+                        py::arg("t_out"))
+        .def("set_probe_plane", &PlaneProbe::setProbePlane, py::arg("pos_x"), py::arg("pos_y"), py::arg("pos_z"), py::arg("delta_x"), py::arg("delta_y"), py::arg("delta_z"));
 
         py::class_<PlanarAverageProbe, Probe, std::shared_ptr<PlanarAverageProbe>>(probeModule, "PlanarAverageProbe")
         .def(py::init<
@@ -76,14 +108,14 @@ namespace probes
                         uint,
                         uint,
                         char>(),
-                        "probe_name",
-                        "output_path",
-                        "t_start_avg",
-                        "t_start_tmp_avg",
-                        "t_avg",
-                        "t_start_out",
-                        "t_out",
-                        "plane_normal");
+                        py::arg("probe_name"),
+                        py::arg("output_path"),
+                        py::arg("t_start_avg"),
+                        py::arg("t_start_tmp_avg"),
+                        py::arg("t_avg"),
+                        py::arg("t_start_out"),
+                        py::arg("t_out"),
+                        py::arg("plane_normal"));
 
 
         py::class_<WallModelProbe, Probe, std::shared_ptr<WallModelProbe>>(probeModule, "WallModelProbe")
@@ -95,15 +127,15 @@ namespace probes
                         uint,
                         uint,
                         uint>(), 
-                        "probe_name",
-                        "output_path"
-                        "t_start_avg",
-                        "t_start_tmp_avg",
-                        "t_avg",
-                        "t_start_out",
-                        "t_out")
-        .def("set_force_output_to_stress", &WallModelProbe::setForceOutputToStress)
-        .def("set_evaluate_pressure_gradient", &WallModelProbe::setEvaluatePressureGradient);
+                        py::arg("probe_name"),
+                        py::arg("output_path"),
+                        py::arg("t_start_avg"),
+                        py::arg("t_start_tmp_avg"),
+                        py::arg("t_avg"),
+                        py::arg("t_start_out"),
+                        py::arg("t_out"))
+        .def("set_force_output_to_stress", &WallModelProbe::setForceOutputToStress, py::arg("output_stress"))
+        .def("set_evaluate_pressure_gradient", &WallModelProbe::setEvaluatePressureGradient, py::arg("eval_press_grad"));
 
         return probeModule;
     }
diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp
index b775d604ba41530223f22738c72785b2c15348b3..d32ef272a1fd26510439dde6ab3a9438d68009a7 100644
--- a/pythonbindings/src/gpu/submodules/simulation.cpp
+++ b/pythonbindings/src/gpu/submodules/simulation.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file simulation.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/VirtualFluids_GPU/LBM/Simulation.h>
 #include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
@@ -8,6 +40,9 @@
 #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h>
 #include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h>
 #include <gpu/VirtualFluids_GPU/Output/DataWriter.h>
+#include "gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
+#include "gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h"
 
 namespace simulation
 {
@@ -20,13 +55,41 @@ namespace simulation
         .def(py::init<  std::shared_ptr<Parameter>,
                         std::shared_ptr<CudaMemoryManager>,
                         vf::gpu::Communicator &,
-                        GridProvider &>(), 
-                        "parameter",
-                        "memoryManager",
-                        "communicator",
-                        "gridProvider")
+                        GridProvider &,
+                        BoundaryConditionFactory*,
+                        GridScalingFactory*>(), 
+                        py::arg("parameter"),
+                        py::arg("memoryManager"),
+                        py::arg("communicator"),
+                        py::arg("gridProvider"),
+                        py::arg("bcFactory"),
+                        py::arg("gridScalingFactory"))
+        .def(py::init<  std::shared_ptr<Parameter>,
+                        std::shared_ptr<CudaMemoryManager>,
+                        vf::gpu::Communicator &,
+                        GridProvider &,
+                        BoundaryConditionFactory*>(), 
+                        py::arg("parameter"),
+                        py::arg("memoryManager"),
+                        py::arg("communicator"),
+                        py::arg("gridProvider"),
+                        py::arg("bcFactory"))
+        .def(py::init<  std::shared_ptr<Parameter>,
+                        std::shared_ptr<CudaMemoryManager>,
+                        vf::gpu::Communicator &,
+                        GridProvider &,
+                        BoundaryConditionFactory*,
+                        std::shared_ptr<TurbulenceModelFactory>,
+                        GridScalingFactory*>(), 
+                        py::arg("parameter"),
+                        py::arg("memoryManager"),
+                        py::arg("communicator"),
+                        py::arg("gridProvider"),
+                        py::arg("bcFactory"),
+                        py::arg("tmFactory"),
+                        py::arg("gridScalingFactory"))
         .def("run", &Simulation::run)
-        .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer)
-        .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer);
+        .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer, py::arg("t_analyse"))
+        .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer, py::arg("t_analyse"));
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp b/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..89370ef4c1b91a0c8e480e968a1df3bd4fe540ca
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp
@@ -0,0 +1,52 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file transient_bc_setter.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
+#include <pybind11/pybind11.h>
+#include <gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h>
+
+namespace transient_bc_setter
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::enum_<FileType>(parentModule, "FileType")
+        .value("VTK", FileType::VTK);
+
+        parentModule.def("create_file_collection", &createFileCollection, py::arg("prefix"), py::arg("type"));
+
+        py::class_<FileCollection, std::shared_ptr<FileCollection>>(parentModule, "FileCollection");
+
+        py::class_<VTKFileCollection, FileCollection, std::shared_ptr<VTKFileCollection>>(parentModule, "VTKFileCollection")
+        .def(py::init <std::string>(), py::arg("prefix"));
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/turbulence_models.cpp b/pythonbindings/src/gpu/submodules/turbulence_models.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cfbb9e56127fee0cd90a482dde258d8b96389989
--- /dev/null
+++ b/pythonbindings/src/gpu/submodules/turbulence_models.cpp
@@ -0,0 +1,56 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file turbulence_models.cpp
+//! \ingroup submodules
+//! \author Henry Korb
+//=======================================================================================
+#include "pybind11/pybind11.h"
+#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
+#include "gpu/VirtualFluids_GPU/LBM/LB.h"
+
+namespace turbulence_model
+{
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::enum_<TurbulenceModel>(parentModule, "TurbulenceModel")
+        .value("Smagorinsky", TurbulenceModel::Smagorinsky)
+        .value("AMD", TurbulenceModel::AMD)
+        .value("QR", TurbulenceModel::QR)
+        .value("None", TurbulenceModel::None);
+
+        py::class_<TurbulenceModelFactory, std::shared_ptr<TurbulenceModelFactory>>(parentModule, "TurbulenceModelFactory")
+        .def(py::init< std::shared_ptr<Parameter>>(), py::arg("para"))
+        .def("set_turbulence_model", &TurbulenceModelFactory::setTurbulenceModel, py::arg("turbulence_model"))
+        .def("set_model_constant", &TurbulenceModelFactory::setModelConstant, py::arg("model_constant"))
+        .def("read_config_file", &TurbulenceModelFactory::readConfigFile, py::arg("config_data"));
+
+    }
+}
\ No newline at end of file
diff --git a/pythonbindings/src/lbm/lbm.cpp b/pythonbindings/src/lbm/lbm.cpp
index 441b9ff372f4e4513fee58c4a8a1cd78d38582dd..90fd4a71b0101469666936c89974de316e0e2b18 100644
--- a/pythonbindings/src/lbm/lbm.cpp
+++ b/pythonbindings/src/lbm/lbm.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file lbm.cpp
+//! \ingroup lbm
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 
 namespace lbm
diff --git a/pythonbindings/src/logger/logger.cpp b/pythonbindings/src/logger/logger.cpp
index 82ad3d92760ae38c0eb62b16be726e4eeaca08ac..555b502fa9a56299895de0fa6dd6cfeb66c15024 100644
--- a/pythonbindings/src/logger/logger.cpp
+++ b/pythonbindings/src/logger/logger.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file logging.cpp
+//! \ingroup logger
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <logger/Logger.h>
 
@@ -10,15 +42,15 @@ namespace logging
         py::module loggerModule = parentModule.def_submodule("logger");
 
         py::class_<vf::logging::Logger>(loggerModule, "Logger")
-        .def("initialize_logger", &vf::logging::Logger::initalizeLogger)
-        .def("change_log_path", &vf::logging::Logger::changeLogPath);
+        .def_static("initialize_logger", &vf::logging::Logger::initalizeLogger)
+        .def_static("change_log_path", &vf::logging::Logger::changeLogPath, py::arg("path"));
 
         // use f-strings (f"text {float}") in python for compounded messages
-        loggerModule.def("vf_log_trace", [](std::string arg){ VF_LOG_TRACE(arg); });        
-        loggerModule.def("vf_log_debug", [](std::string arg){ VF_LOG_DEBUG(arg); });        
-        loggerModule.def("vf_log_info", [](std::string arg){ VF_LOG_INFO(arg); });        
-        loggerModule.def("vf_log_warning", [](std::string arg){ VF_LOG_WARNING(arg); });        
-        loggerModule.def("vf_log_critical", [](std::string arg){ VF_LOG_CRITICAL(arg); });        
+        loggerModule.def("vf_log_trace", [](std::string message){ VF_LOG_TRACE(message); }, py::arg("message"));        
+        loggerModule.def("vf_log_debug", [](std::string message){ VF_LOG_DEBUG(message); }, py::arg("message"));        
+        loggerModule.def("vf_log_info", [](std::string message){ VF_LOG_INFO(message); }, py::arg("message"));        
+        loggerModule.def("vf_log_warning", [](std::string message){ VF_LOG_WARNING(message); }, py::arg("message"));        
+        loggerModule.def("vf_log_critical", [](std::string message){ VF_LOG_CRITICAL(message); }, py::arg("message"));        
 
         return loggerModule;
     }
diff --git a/pythonbindings/src/muParser.cpp b/pythonbindings/src/muParser.cpp
index 47408c2758fc92991f1be3113d78b8741215b152..eec39de0b72c21aaa924ea805414847aa9de4492 100644
--- a/pythonbindings/src/muParser.cpp
+++ b/pythonbindings/src/muParser.cpp
@@ -1,9 +1,41 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file muParser.cpp
+//! \ingroup src
+//! \author Henry Korb
+//=======================================================================================
 #include <pybind11/pybind11.h>
 #include <muParser.h>
 
 namespace py = pybind11;
 
-PYBIND11_MODULE(pymuparser, m) {
+PYBIND11_MODULE(bindings, m) {
     py::class_<mu::ParserBase>(m, "_ParserBase");
 
     py::class_<mu::Parser, mu::ParserBase>(m, "Parser")
diff --git a/regression-tests/driven_cavity_test.sh b/regression-tests/driven_cavity_test.sh
index e10a829d2680ab647ba0f66e0f2e85a70186007e..7f799facb4459ddafcd8b210a5477954af1444cb 100755
--- a/regression-tests/driven_cavity_test.sh
+++ b/regression-tests/driven_cavity_test.sh
@@ -7,8 +7,8 @@
 # build VirtualFluids accordingly to our specific test scenario.
 # in this case adding -DUSER_APPS="apps/gpu/LBM/DrivenCavity to the cmake command is not necessary, because the DrivenCavity is added to VirtualFluids by default.
 mkdir -p build
-cmake -B build --preset=gpu_make -DCMAKE_CUDA_ARCHITECTURES=75 #-DUSER_APPS="apps/gpu/LBM/DrivenCavity"
-cd build && make -j 8 && cd ..
+cmake -B build --preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 #-DUSER_APPS="apps/gpu/LBM/DrivenCavity"
+cmake --build build --parallel 8
 
 # execute VirtualFluids
 ./build/bin/DrivenCavity
@@ -21,4 +21,4 @@ PATH_TO_DIR=output/DrivenCavity
 PATH_TO_REFERENCE_DIR=regression-tests/reference_data/regression_tests/gpu/DrivenCavity_2Levels
 
 # execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare)
-fieldcompare dir $PATH_TO_DIR --reference $PATH_TO_REFERENCE_DIR --include-files "*.vtu"
\ No newline at end of file
+fieldcompare dir $PATH_TO_DIR $PATH_TO_REFERENCE_DIR --include-files "*.vtu"
diff --git a/regression-tests/driven_cavity_uniform_test.sh b/regression-tests/driven_cavity_uniform_test.sh
new file mode 100755
index 0000000000000000000000000000000000000000..95e2bab635d3a6a73fb514a1f67902083c98e5d3
--- /dev/null
+++ b/regression-tests/driven_cavity_uniform_test.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+#################################
+# Driven Cavity Regression Test
+#################################
+
+# build VirtualFluids accordingly to our specific test scenario.
+# in this case adding -DUSER_APPS="apps/gpu/LBM/DrivenCavity to the cmake command is not necessary, because the DrivenCavity is added to VirtualFluids by default.
+mkdir -p build
+cmake -B build --preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS="apps/gpu/LBM/DrivenCavityUniform"
+cmake --build build --parallel 8
+
+# execute VirtualFluids
+./build/bin/DrivenCavityUniform
+
+
+# set the path to the produced data
+PATH_TO_DIR=output/DrivenCavity_uniform
+
+# set the path to the reference data.
+# `regression-tests/reference_data` is fix `regression_tests/gpu/DrivenCavity_uniform_2022_12_16` must match the structure in https://github.com/irmb/test_data:
+PATH_TO_REFERENCE_DIR=regression-tests/reference_data/regression_tests/gpu/DrivenCavity_uniform
+
+# execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare)
+fieldcompare dir $PATH_TO_DIR $PATH_TO_REFERENCE_DIR --include-files "*.vtu"
diff --git a/regression-tests/multigpu_test/rocket.yml b/regression-tests/multigpu_test/rocket.yml
new file mode 100755
index 0000000000000000000000000000000000000000..f621b1349c042e02f2e834e697147da0822ffe1f
--- /dev/null
+++ b/regression-tests/multigpu_test/rocket.yml
@@ -0,0 +1,48 @@
+host: $PHOENIX_REMOTE_HOST
+user: $PHOENIX_REMOTE_USER
+private_keyfile: $PHOENIX_PRIVATE_KEY
+
+copy:
+  - from: regression-tests/multigpu_test/slurm.job
+    to: multigpu_test/slurm.job
+    overwrite: true
+
+  - from: "CMake/"
+    to: "multigpu_test/CMake/"
+    overwrite: true
+
+  - from: "3rdParty/"
+    to: "multigpu_test/3rdParty/"
+    overwrite: true
+
+  - from: "CMakeLists.txt"
+    to: "multigpu_test/CMakeLists.txt"
+    overwrite: true
+
+  - from: "gpu.cmake"
+    to: "multigpu_test/gpu.cmake"
+    overwrite: true
+
+  - from: "src/"
+    to: "multigpu_test/src/"
+    overwrite: true
+
+  - from: "CMakePresets.json"
+    to: "multigpu_test/CMakePresets.json"
+    overwrite: true
+
+  - from: "apps/gpu/LBM/"
+    to: "multigpu_test/apps/gpu/LBM/"
+    overwrite: true
+
+collect:
+  - from: multigpu_test/output/
+    to: output/results/
+    overwrite: true
+
+  - from: multigpu_test/slurmMultiGPU.out
+    to: output/slurmMultiGPU.out
+    overwrite: true
+
+sbatch: multigpu_test/slurm.job
+continue_if_job_fails: true
diff --git a/regression-tests/multigpu_test/slurm.job b/regression-tests/multigpu_test/slurm.job
new file mode 100755
index 0000000000000000000000000000000000000000..0ee0df46ab64bab6520f9f46fc939d5b3186fae7
--- /dev/null
+++ b/regression-tests/multigpu_test/slurm.job
@@ -0,0 +1,29 @@
+#!/bin/bash -l
+
+#SBATCH --partition=gpu01_queue
+#SBATCH --nodes=1
+#SBATCH --time=10:00:00
+#SBATCH --job-name=Cavity4GPU
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --output=multigpu_test/slurmMultiGPU.out
+##SBATCH --exclusive
+
+module purge 
+module load comp/ccache/4.1 # loads comp/gcc/9.3.0 
+module load mpi/openmpi/4.0.5_gcc_9.3/openmpi 
+module load cuda/11.3
+module load comp/git/2.27.0
+PATH=/home/irmb/tools/cmake-3.20.3-linux-x86_64/bin:$PATH
+
+module list
+
+cd multigpu_test
+mkdir -p build
+cd build
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS="apps/gpu/LBM/DrivenCavityMultiGPU"
+make -j 16
+cd ..
+mkdir -p output
+
+mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU.txt"
\ No newline at end of file
diff --git a/regression-tests/regression-tests.sh b/regression-tests/regression-tests.sh
index 5b7d227907594b727103be91d2382c05a07b9c6f..9f5dc8cf758b380709fcc9ad8020d1335f760f64 100755
--- a/regression-tests/regression-tests.sh
+++ b/regression-tests/regression-tests.sh
@@ -13,11 +13,10 @@ git clone https://github.com/irmb/test_data regression-tests/reference_data
 #    by cloning our meshio patch and fieldcompare into a venv
 python3 -m venv .venv
 source .venv/bin/activate
-pip install rich
-pip install git+https://github.com/soerenPeters/meshio@update-pyproject-version
-pip install git+https://gitlab.com/dglaeser/fieldcompare
+pip install fieldcompare
 
 # 3. Running the specific tests
+./regression-tests/driven_cavity_uniform_test.sh
 ./regression-tests/driven_cavity_test.sh
 
 
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..5894f9dec06953c3eeb909af96db9cb19d202d65
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,11 @@
+[metadata]
+name = pyfluids
+description = Python binding for VirtualFluids
+long_description = file: README.md
+long_description_content_type = text/markdown
+platforms = any
+url = https://git.rz.tu-bs.de/irmb/virtualfluids
+version = 0.1.0
+
+[options]
+python_requires = >=3.6
diff --git a/setup.py b/setup.py
index b26e1c13d09447d17f8e9fd6e2cd0d0671595bf3..530431b3775970b5222bc87d32bfb407363f95d6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,137 +1,72 @@
-import os
-import re
 import sys
-import platform
-import subprocess
+from pathlib import Path
+from typing import List
 
-from setuptools import setup, Extension
-from setuptools.command.build_ext import build_ext
-from setuptools.command.install import install
-from setuptools.command.develop import develop
-from distutils.version import LooseVersion
+import skbuild
 
 """
-Install python wrapper of virtual fluids
-Install GPU backend with option --GPU
-(pass to pip via --install-option="--GPU")
+Install python wrapper of Virtual Fluids
+install via python:
+    python setup.py install
+    set CMAKE Flags via -DBUILD_VF_GPU:BOOL=ON
+    CMAKE flags have to be separated by -- 
+    example: python setup.py install -- -DBUILD_VF_CPU:BOOL=ON
+or install via pip:
+    pip install .
+    for pip>21:
+        set CMAKE Flags via --config-settings "-DBUILD_VF_GPU=ON"
+        example: pip install . --config-settings="-DBUILD_VF_GPU=ON"
+        each option has to be passed in individually i.e --config-settings="-DOPT1=ON" --config-settings="-DOPT2=OFF"
+    for pip <21:
+        set CMAKE Flags via --global-option ="-DBUILD_VF_GPU=ON"
+        example: pip install . --global-option="-DBUILD_VF_GPU=ON"
 """
 
-vf_cmake_args = [
-    "-DBUILD_VF_PYTHON_BINDINGS=ON",
-    "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
-    "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache",
-    "-DCMAKE_C_COMPILER_LAUNCHER=ccache",
-    "-DBUILD_SHARED_LIBS=OFF",
-    "-DBUILD_WARNINGS_AS_ERRORS=OFF"
-]
-
-vf_cpu_cmake_args = [
-    "-DBUILD_VF_DOUBLE_ACCURACY=ON",
-    "-DBUILD_VF_CPU:BOOL=ON",
-    "-DBUILD_VF_UNIT_TESTS:BOOL=ON",
-    "-DUSE_METIS=ON",
-    "-DUSE_MPI=ON"
-]
-
-vf_gpu_cmake_args = [
-    "-DBUILD_VF_DOUBLE_ACCURACY=OFF",
-    "-DBUILD_VF_GPU:BOOL=ON",
-    "-DBUILD_VF_UNIT_TESTS:BOOL=OFF",
-]
-
-GPU = False
-
-class CommandMixin:
-    user_options = [
-        ('GPU', None, 'compile pyfluids with GPU backend'),
+package_name = "pyfluids"
+target = "python_bindings"
+src_dir = "pythonbindings"
+stub_package = package_name+"-stubs"
+
+stub_dir = Path(src_dir)/stub_package
+
+
+def add_subfiles(dir_path: Path, suffix: str, root_dir: Path) -> List[str]:
+    files = []
+    for f in dir_path.iterdir():
+        if f.is_dir():
+            files.extend(add_subfiles(f, suffix, root_dir))
+        if f.is_file():
+            if f.suffix != suffix:
+                continue
+            files.append(str(f.relative_to(root_dir)))
+    return files
+
+def add_directory(dir_path: Path, suffix: str):
+    return add_subfiles(dir_path, suffix, dir_path)
+
+stub_files = add_directory(stub_dir, ".pyi")
+
+# hack to get config-args for installation with pip>21
+cmake_args = []
+if "config_args" in locals():
+    cmake_args.extend([f"{k}={v}" for k, v in locals()["config_args"].items()])
+
+cmake_args += [
+        f"-DPython3_ROOT_DIR={Path(sys.prefix)}",
+        "-DBUILD_VF_PYTHON_BINDINGS=ON",
+        "-DBUILD_SHARED_LIBS=OFF",
+        "-DBUILD_VF_DOUBLE_ACCURACY=OFF",
+        "-DBUILD_VF_UNIT_TESTS:BOOL=OFF",
+        "-DBUILD_WARNINGS_AS_ERRORS=OFF",
     ]
 
-    def initialize_options(self):
-        super().initialize_options()
-        self.GPU = False
-
-    def finalize_options(self):
-        super().finalize_options()
-
-    def run(self):
-        global GPU
-        GPU = GPU or self.GPU
-        super().run()
-
-
-class InstallCommand(CommandMixin, install):
-    user_options = getattr(install, 'user_options', []) + CommandMixin.user_options
-
-
-class DevelopCommand(CommandMixin, develop):
-    user_options = getattr(develop, 'user_options', []) + CommandMixin.user_options
-
-
-class CMakeExtension(Extension):
-    def __init__(self, name, sourcedir=''):
-        Extension.__init__(self, name, sources=[])
-        self.sourcedir = os.path.abspath(sourcedir)
-
-
-class CMakeBuild(CommandMixin, build_ext):
-    user_options = getattr(build_ext, 'user_options', []) + CommandMixin.user_options
-
-    def run(self):
-        super().run()
-        try:
-            out = subprocess.check_output(['cmake', '--version'])
-        except OSError:
-            raise RuntimeError("CMake must be installed to build the following extensions: " +
-                               ", ".join(e.name for e in self.extensions))
-
-        if platform.system() == "Windows":
-            cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
-            if cmake_version < '3.1.0':
-                raise RuntimeError("CMake >= 3.1.0 is required on Windows")
-
-        for ext in self.extensions:
-            self.build_extension(ext)
-
-    def build_extension(self, ext):
-        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
-        # required for auto-detection of auxiliary "native" libs
-        if not extdir.endswith(os.path.sep):
-            extdir += os.path.sep
-
-        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
-                      '-DPYTHON_EXECUTABLE=' + sys.executable]
-
-        cfg = 'Debug' if self.debug else 'Release'
-        build_args = ['--config', cfg]
-
-        if platform.system() == "Windows":
-            cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
-            if sys.maxsize > 2**32:
-                cmake_args += ['-A', 'x64']
-            build_args += ['--', '/m']
-        else:
-            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
-            build_args += ['--', '-j2']
-
-        cmake_args.extend(vf_cmake_args)
-        cmake_args.extend(vf_gpu_cmake_args if GPU else vf_cpu_cmake_args)
-
-        env = os.environ.copy()
-        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
-                                                              self.distribution.get_version())
-        if not os.path.exists(self.build_temp):
-            os.makedirs(self.build_temp)
-        cmake_cache_file = self.build_temp+"/CMakeCache.txt"
-        if os.path.exists(cmake_cache_file):
-            os.remove(cmake_cache_file)
-        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
-        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
-
-
-setup(
-    name='pyfluids',
-    version='0.0.1',
-    ext_modules=[CMakeExtension('pyfluids')],
-    cmdclass={"install": InstallCommand, "develop": DevelopCommand, "build_ext": CMakeBuild},
-    zip_safe=False,
+skbuild.setup(
+    name=package_name,
+    packages=[package_name, "pymuparser", "pyfluids-stubs"],
+    package_dir={"": src_dir},
+    cmake_args=cmake_args,
+    cmake_install_target=target,
+    package_data={  "pyfluids": ["py.typed"],
+                    "pyfluids-stubs": stub_files},
+    include_package_data=True,
 )
diff --git a/src/basics/basics/utilities/UbMath.h b/src/basics/basics/utilities/UbMath.h
index fe6b01140a4f1675335e3e4f8c9a542055881727..5ba42eb6b0c00941c72ea8370ab3fe0cef71ba98 100644
--- a/src/basics/basics/utilities/UbMath.h
+++ b/src/basics/basics/utilities/UbMath.h
@@ -413,93 +413,93 @@ inline const T &min(const T &a1, const T &a2, const T &a3, const T &a4)
 // constants
 //
 //////////////////////////////////////////////////////////////////////////
-static const double c8o27  = 8. / 27.;
-static const double c2o27  = 2. / 27.;
-static const double c1o54  = 1. / 54.;
-static const double c1o216 = 1. / 216.;
-static const double c9o2   = 9. / 2.; // 4.5
-static const double c9o4   = 9. / 4.; // 2.25
-static const double c3o9   = 3. / 9.;
-static const double c3o54  = 3. / 54.;
-static const double c3o216 = 3. / 216.;
-
-static const double c1o27 = 1. / 27.;
-
-static const double c1o72          = 1. / 72.; // 0.01388888
-static const double c1o36          = 1. / 36.; // 0.02777777
-static const double c1o48          = 1. / 48.; // 0.02083333
-static const double c1o32          = 1. / 32.; // 0.03125
-static const double c1o24          = 1. / 24.; // 0.04166666
-static const double c1o20          = 1. / 20.; // 0.05
-static const double c1o18          = 1. / 18.; // 0.05555555
-static const double c1o16          = 1. / 16.; // 0.0625
-static const double c1o12          = 1. / 12.; // 0.08333333
-static const double c1o9           = 1. / 9.;  // 0.11111111
-static const double c1o8           = 1. / 8.;  // 0.125
-static const double c1o6           = 1. / 6.;  // 0.16666666
-static const double c1o5           = 1. / 5.;  // 0.2
-static const double c1o4           = 1. / 4.;  // 0.25
-static const double c1o100         = 1. / 100.;
-static const double c5o16          = 5. / 16.;        // 0.3125
+//static const double c8o27  = 8. / 27.;
+//static const double c2o27  = 2. / 27.;
+//static const double c1o54  = 1. / 54.;
+//static const double c1o216 = 1. / 216.;
+//static const double c9o2   = 9. / 2.; // 4.5
+//static const double c9o4   = 9. / 4.; // 2.25
+//static const double c3o9   = 3. / 9.;
+//static const double c3o54  = 3. / 54.;
+//static const double c3o216 = 3. / 216.;
+//
+//static const double c1o27 = 1. / 27.;
+//
+//static const double c1o72          = 1. / 72.; // 0.01388888
+//static const double c1o36          = 1. / 36.; // 0.02777777
+//static const double c1o48          = 1. / 48.; // 0.02083333
+//static const double c1o32          = 1. / 32.; // 0.03125
+//static const double c1o24          = 1. / 24.; // 0.04166666
+//static const double c1o20          = 1. / 20.; // 0.05
+//static const double c1o18          = 1. / 18.; // 0.05555555
+//static const double c1o16          = 1. / 16.; // 0.0625
+//static const double c1o12          = 1. / 12.; // 0.08333333
+//static const double c1o9           = 1. / 9.;  // 0.11111111
+//static const double c1o8           = 1. / 8.;  // 0.125
+//static const double c1o6           = 1. / 6.;  // 0.16666666
+//static const double c1o5           = 1. / 5.;  // 0.2
+//static const double c1o4           = 1. / 4.;  // 0.25
+//static const double c1o100         = 1. / 100.;
+//static const double c5o16          = 5. / 16.;        // 0.3125
 static const double c1o3           = 1. / 3.;         // 0.33333333
-static const double c3o8           = 3. / 8.;         // 0.375
-static const double c4o9           = 4. / 9.;         // 0.44444444
-static const double c1o2           = 1. / 2.;         // 0.5
-static const double c9o16          = 9. / 16.;        // 0.5625
-static const double c2o3           = 2. / 3.;         // 0.66666666
-static const double c3o4           = 3. / 4.;         // 0.75
-static const double c3o2           = 3. / 2.;         // 1.5
-static const double c4o3           = 4. / 3.;         // 1.33333333
-static const double c5o3           = 5. / 3.;         // 1.66666666
-static const double c9o5           = 9. / 5.;         // 1.8
-static const double c2o9           = 2. / 9.;         // 0.22222222
-static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
-static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
-static const double sqrt2          = sqrt(2.0);       // 1.4142135
-static const double sqrt3          = sqrt(3.0);       // 1.7320508
-static const double zeroReal       = 0.0;
-static const double c1             = 1.0;
-static const double c2             = 2.0;
-static const double c3             = 3.0;
-static const double c4             = 4.0;
-static const double c5             = 5.0;
-static const double c6             = 6.0;
-static const double c7             = 7.0;
-static const double c8             = 8.0;
-static const double c9             = 9.0;
-static const double c14            = 14.0;
-static const double c15            = 15.0;
-static const double c16            = 16.0;
-static const double c18            = 18.0;
-static const double c21            = 21.0;
-static const double c24            = 24.0;
-static const double c28            = 28.0;
-static const double c29            = 29.0;
-static const double c36            = 36.0;
-static const double c48            = 48.0;
-static const double c50            = 50.0;
-static const double c56            = 56.0;
-static const double c152           = 152.0;
-static const double c130           = 130.0;
-static const double one            = 1.0;
-static const double two            = 2.0;
-static const double three          = 3.0;
-static const double four           = 4.0;
-static const double five           = 5.0;
-static const double six            = 6.0;
-static const double seven          = 7.0;
-static const double eight          = 8.0;
-static const double nine           = 9.0;
-static const double fourteen       = 14.0;
-static const double fiveteen       = 15.0;
-static const double sixteen        = 16.0;
-static const double twentyone      = 21.0;
-static const double twentyfour     = 24.0;
-static const double twentyeight    = 28.0;
-static const double twentynine     = 29.0;
-static const double fourtyeight    = 48.0;
-static const double fifty          = 50.0;
-static const double fiftysix       = 56.0;
+//static const double c3o8           = 3. / 8.;         // 0.375
+//static const double c4o9           = 4. / 9.;         // 0.44444444
+//static const double c1o2           = 1. / 2.;         // 0.5
+//static const double c9o16          = 9. / 16.;        // 0.5625
+//static const double c2o3           = 2. / 3.;         // 0.66666666
+//static const double c3o4           = 3. / 4.;         // 0.75
+//static const double c3o2           = 3. / 2.;         // 1.5
+//static const double c4o3           = 4. / 3.;         // 1.33333333
+//static const double c5o3           = 5. / 3.;         // 1.66666666
+//static const double c9o5           = 9. / 5.;         // 1.8
+//static const double c2o9           = 2. / 9.;         // 0.22222222
+//static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
+//static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
+//static const double sqrt2          = sqrt(2.0);       // 1.4142135
+//static const double sqrt3          = sqrt(3.0);       // 1.7320508
+//static const double zeroReal       = 0.0;
+//static const double c1             = 1.0;
+//static const double c2             = 2.0;
+//static const double c3             = 3.0;
+//static const double c4             = 4.0;
+//static const double c5             = 5.0;
+//static const double c6             = 6.0;
+//static const double c7             = 7.0;
+//static const double c8             = 8.0;
+//static const double c9             = 9.0;
+//static const double c14            = 14.0;
+//static const double c15            = 15.0;
+//static const double c16            = 16.0;
+//static const double c18            = 18.0;
+//static const double c21            = 21.0;
+//static const double c24            = 24.0;
+//static const double c28            = 28.0;
+//static const double c29            = 29.0;
+//static const double c36            = 36.0;
+//static const double c48            = 48.0;
+//static const double c50            = 50.0;
+//static const double c56            = 56.0;
+//static const double c152           = 152.0;
+//static const double c130           = 130.0;
+//static const double one            = 1.0;
+//static const double two            = 2.0;
+//static const double three          = 3.0;
+//static const double four           = 4.0;
+//static const double five           = 5.0;
+//static const double six            = 6.0;
+//static const double seven          = 7.0;
+//static const double eight          = 8.0;
+//static const double nine           = 9.0;
+//static const double fourteen       = 14.0;
+//static const double fiveteen       = 15.0;
+//static const double sixteen        = 16.0;
+//static const double twentyone      = 21.0;
+//static const double twentyfour     = 24.0;
+//static const double twentyeight    = 28.0;
+//static const double twentynine     = 29.0;
+//static const double fourtyeight    = 48.0;
+//static const double fifty          = 50.0;
+//static const double fiftysix       = 56.0;
 
 } // namespace UbMath
 
diff --git a/src/basics/basics/utilities/UbTuple.h b/src/basics/basics/utilities/UbTuple.h
index fe9c787cead38621beafab3d082122277bdcff73..228ab48898e5e61777d2fcc0061eb6f0434d5cad 100644
--- a/src/basics/basics/utilities/UbTuple.h
+++ b/src/basics/basics/utilities/UbTuple.h
@@ -597,6 +597,8 @@ inline UbTuple<T1, T2, T3, T4, T5, T6, T7, T8> makeUbTuple(T1 const &a1, T2 cons
 // some typedefs
 using UbTupleFloat2        = UbTuple<float, float>;
 using UbTupleFloat3        = UbTuple<float, float, float>;
+using UbTupleFloat4        = UbTuple<float, float, float, float>;
+using UbTupleFloat6        = UbTuple<float, float, float,float, float, float>;
 using UbTupleInt2          = UbTuple<int, int>;
 using UbTupleInt3          = UbTuple<int, int, int>;
 using UbTupleInt4          = UbTuple<int, int, int, int>;
diff --git a/src/basics/basics/writer/WbWriter.h b/src/basics/basics/writer/WbWriter.h
index 26d43464c03311a2cbc14cd4fc9fe717d4b01531..55dceb7cb4a64dc90f0677796cab52135b726f56 100644
--- a/src/basics/basics/writer/WbWriter.h
+++ b/src/basics/basics/writer/WbWriter.h
@@ -88,7 +88,12 @@ public:
     {
         throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name());
     }
-
+    virtual std::string writeLinesWithLineData(const std::string & /*filename*/, std::vector<UbTupleFloat3> & /*nodes*/,
+                                               std::vector<UbTupleInt2> & /*lines*/, std::vector<std::string> & /*datanames*/,
+                                               std::vector<std::vector<float>> & /*celldata*/)
+    {
+        throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name());
+    }
     //////////////////////////////////////////////////////////////////////////
     // triangles
     // cell numbering:
diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp
index 6731fa56026ca284ad671cb6ce59000a609bbb8c..55c3541983ea4248512508146792832a34a1c563 100644
--- a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp
+++ b/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp
@@ -34,6 +34,8 @@
 #include <basics/writer/WbWriterVtkXmlASCII.h>
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 #include <cstring>
+#include <fstream>
+#include <string>
 
 using namespace std;
 
@@ -154,12 +156,13 @@ string WbWriterVtkXmlBinary::writeParallelFile(const string &filename, vector<st
 
     return vtkfilename;
 }
+
 /*===============================================================================*/
-string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes,
-                                        vector<UbTupleInt2> &lines)
+
+// helper functions
+
+ofstream createFileStream(std::string vtkfilename)
 {
-    string vtkfilename = filename + getFileExtension();
-    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start");
 
     ofstream out(vtkfilename.c_str(), ios::out | ios::binary);
     if (!out) {
@@ -172,89 +175,199 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
         if (!out)
             throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
     }
+    return out;
+}
 
-    int nofNodes = (int)nodes.size();
-    int nofCells = (int)lines.size();
-
-    int bytesPerByteVal      = 4; //==sizeof(int)
-    int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 2 /*nodes per line */ * nofCells * sizeof(int);
-    int bytesCellOffsets     = 1 /*offset per line */ * nofCells * sizeof(int);
-    int bytesCellTypes       = 1 /*type of line */ * nofCells * sizeof(unsigned char);
-
-    int offset = 0;
-    // VTK FILE
+void writeVtkHeader(ofstream &out, int numberOfNodes, int numberOfCells)
+{
     out << "<?xml version=\"1.0\"?>\n";
     out << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\" >"
         << "\n";
     out << "   <UnstructuredGrid>"
         << "\n";
-    out << "      <Piece NumberOfPoints=\"" << nofNodes << "\" NumberOfCells=\"" << nofCells << "\">\n";
+    out << "      <Piece NumberOfPoints=\"" << numberOfNodes << "\" NumberOfCells=\"" << numberOfCells << "\">\n";
+}
 
-    // POINTS SECTION
+int writePointHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesPoints)
+{
     out << "         <Points>\n";
     out << "            <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"appended\" offset=\"" << offset
         << "\"  />\n";
     out << "         </Points>\n";
     offset += (bytesPerByteVal + bytesPoints);
+    return offset;
+}
 
-    // CELLS SECTION
+int writeCellHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesCellConnectivity, int bytesCellOffsets,
+                    int bytesCellTypes)
+{
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
     out << "            <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\"" << offset << "\" />\n ";
     offset += (bytesPerByteVal + bytesCellTypes);
     out << "         </Cells>\n";
+    return offset;
+}
 
+int writeDataHeader(ofstream &out, vector<string> &datanames, int offset, int bytesPerByteVal, int bytesScalarData)
+{
+    out << "         <CellData>\n";
+    for (size_t s = 0; s < datanames.size(); ++s) {
+        out << "            <DataArray type=\"Float32\" Name=\"" << datanames[s] << "\" format=\"appended\" offset=\""
+            << offset << "\" /> \n";
+        offset += (bytesPerByteVal + bytesScalarData);
+    }
+    out << "         </CellData>\n";
+    return offset;
+}
+
+void writeAppendDataHeader(ofstream &out)
+{
     out << "      </Piece>\n";
     out << "   </UnstructuredGrid>\n";
-
-    // AppendedData SECTION
     out << "   <AppendedData encoding=\"raw\">\n";
     out << "_";
+}
 
-    // POINTS SECTION
+void writePoints(ofstream &out, int bytesPerByteVal, int bytesPoints, vector<UbTupleFloat3> &nodes)
+{
     out.write((char *)&bytesPoints, bytesPerByteVal);
-    for (int n = 0; n < nofNodes; n++) {
+    for (int n = 0; n < (int)nodes.size(); n++) {
         out.write((char *)&val<1>(nodes[n]), sizeof(float));
         out.write((char *)&val<2>(nodes[n]), sizeof(float));
         out.write((char *)&val<3>(nodes[n]), sizeof(float));
     }
+}
 
-    // CELLS SECTION
-    // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
-    for (int c = 0; c < nofCells; c++) {
-        out.write((char *)&val<1>(lines[c]), sizeof(int));
-        out.write((char *)&val<2>(lines[c]), sizeof(int));
+void writeCellConnectivity(ofstream &out, int bytesPerByteVal, int bytesCellConnectivity, vector<UbTupleInt2> &cells)
+{
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
+    for (int c = 0; c < (int)cells.size(); c++) {
+        out.write((char *)&val<1>(cells[c]), sizeof(int));
+        out.write((char *)&val<2>(cells[c]), sizeof(int));
     }
+}
 
-    // cellOffsets
+void writeCellOffsets(ofstream &out, int bytesPerByteVal, int bytesCellOffsets, int numberOfCells)
+{
     out.write((char *)&bytesCellOffsets, bytesPerByteVal);
     int itmp;
-    for (int c = 1; c <= nofCells; c++) {
+    for (int c = 1; c <= numberOfCells; c++) {
         itmp = 2 * c;
         out.write((char *)&itmp, sizeof(int));
     }
+}
 
-    // cellTypes
+void writeCellTypes(ofstream &out, int bytesPerByteVal, int bytesCellTypes, int numberOfCells)
+{
     out.write((char *)&bytesCellTypes, bytesPerByteVal);
     unsigned char vtkCellType = 3;
-    for (int c = 0; c < nofCells; c++) {
+    for (int c = 0; c < numberOfCells; c++) {
         out.write((char *)&vtkCellType, sizeof(unsigned char));
     }
+}
+
+void writeCellData(ofstream &out, int bytesPerByteVal, int bytesScalarData, vector<string> &datanames,
+                   vector<vector<float>> &celldata)
+{
+    for (size_t s = 0; s < datanames.size(); ++s) {
+        out.write((char *)&bytesScalarData, bytesPerByteVal);
+        for (size_t d = 0; d < celldata[s].size(); ++d) {
+            // loake kopie machen, da in celldata "doubles" sind
+            float tmp = (float)celldata[s][d];
+            out.write((char *)&tmp, sizeof(float));
+        }
+    }
+}
+
+void writeEndOfFile(ofstream &out)
+{
     out << "\n</AppendedData>\n";
     out << "</VTKFile>";
     out << endl;
     out.close();
+}
+
+/*===============================================================================*/
+string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes,
+                                        vector<UbTupleInt2> &lines)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start");
+
+    ofstream out = createFileStream(vtkfilename);
+
+    int nofNodes = (int)nodes.size();
+    int nofCells = (int)lines.size();
+
+    int bytesPerByteVal = 4; //==sizeof(int)
+    int bytesPoints = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
+    int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int);
+    int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int);
+    int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char);
+
+    int offset = 0;
+
+    writeVtkHeader(out, nofNodes, nofCells);
+    offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints);
+    writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes);
+    writeAppendDataHeader(out);
+
+    writePoints(out, bytesPerByteVal, bytesPoints, nodes);
+    writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines);
+    writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells);
+    writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells);
+    writeEndOfFile(out);
     UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - end");
 
     return vtkfilename;
 }
+
+/*===============================================================================*/
+string WbWriterVtkXmlBinary::writeLinesWithLineData(const string &filename, vector<UbTupleFloat3> &nodes,
+                                                    vector<UbTupleInt2> &lines, vector<string> &datanames,
+                                                    vector<vector<float>> &celldata)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - start");
+
+    ofstream out = createFileStream(vtkfilename);
+
+    int nofNodes = (int)nodes.size();
+    int nofCells = (int)lines.size();
+
+    int bytesPerByteVal = 4; //==sizeof(int)
+    int bytesPoints = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
+    int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int);
+    int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int);
+    int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char);
+    int bytesScalarData = 1 /*scalar        */ * nofCells * sizeof(float);
+
+    int offset = 0;
+
+    writeVtkHeader(out, nofNodes, nofCells);
+    offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints);
+    offset = writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes);
+    writeDataHeader(out, datanames, offset, bytesPerByteVal, bytesScalarData);
+    writeAppendDataHeader(out);
+
+    writePoints(out, bytesPerByteVal, bytesPoints, nodes);
+    writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines);
+    writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells);
+    writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells);
+    writeCellData(out, bytesPerByteVal, bytesScalarData, datanames, celldata);
+    writeEndOfFile(out);
+
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - end");
+
+    return vtkfilename;
+}
+
 /*===============================================================================*/
 // std::string WbWriterVtkXmlBinary::writeLinesWithNodeData(const string& filename,vector<UbTupleFloat3 >& nodes,
 // vector<UbTupleInt2 >& lines, std::vector< std::string >& datanames, std::vector< std::vector< double > >& nodedata)
@@ -276,7 +389,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //
 //   int bytesPerByteVal      = 4; //==sizeof(int)
 //   int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-//   int bytesCellConnectivty = 2 /*nodes per line  */ * nofCells * sizeof(int  );
+//   int bytesCellConnectivity = 2 /*nodes per line  */ * nofCells * sizeof(int  );
 //   int bytesCellOffsets     = 1 /*offset per line */ * nofCells * sizeof(int  );
 //   int bytesCellTypes       = 1 /*type of line    */ * nofCells * sizeof(unsigned char);
 //   int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -296,7 +409,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //   //CELLS SECTION
 //   out<<"         <Cells>\n";
 //   out<<"            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<< offset <<"\"
-//   />\n"; offset += (bytesPerByteVal + bytesCellConnectivty); out<<"            <DataArray type=\"Int32\"
+//   />\n"; offset += (bytesPerByteVal + bytesCellConnectivity); out<<"            <DataArray type=\"Int32\"
 //   Name=\"offsets\" format=\"appended\" offset=\""<< offset <<"\" />\n"; offset += (bytesPerByteVal +
 //   bytesCellOffsets); out<<"            <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\""<<
 //   offset <<"\" />\n "; offset += (bytesPerByteVal + bytesCellTypes); out<<"         </Cells>\n";
@@ -328,7 +441,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //
 //   //CELLS SECTION
 //   //cellConnectivity
-//   out.write( (char*)&bytesCellConnectivty, bytesPerByteVal );
+//   out.write( (char*)&bytesCellConnectivity, bytesPerByteVal );
 //   for(int c=0; c<nofCells; c++)
 //   {
 //      out.write( (char*)&val<1>(lines[c]), sizeof(int) );
@@ -397,7 +510,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3 - coord    */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 3 /*nodes per triangle  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 3 /*nodes per triangle  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per triangle */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of triangle    */ * nofCells * sizeof(unsigned char);
 
@@ -421,7 +534,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -446,7 +559,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(triangles[c]), sizeof(int));
         out.write((char *)&val<2>(triangles[c]), sizeof(int));
@@ -502,7 +615,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 3 /*nodes per tri   */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 3 /*nodes per tri   */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per tri  */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of tri     */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -527,7 +640,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -561,7 +674,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -625,7 +738,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
 
@@ -649,7 +762,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -674,7 +787,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -730,7 +843,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -755,7 +868,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -789,7 +902,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -855,7 +968,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofCells * sizeof(float);
@@ -880,7 +993,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -914,7 +1027,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -984,7 +1097,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarDataPoint = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -1010,7 +1123,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1052,7 +1165,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1128,7 +1241,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar        */ * nofCells * sizeof(float);
@@ -1153,7 +1266,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1187,7 +1300,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1257,7 +1370,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar        */ * nofNodes * sizeof(double);
@@ -1282,7 +1395,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1316,7 +1429,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1386,7 +1499,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     // int bytesScalarData      = 1 /*scalar        */ * nofNodes * sizeof(float);
@@ -1411,7 +1524,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1436,7 +1549,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1491,7 +1604,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 1 /*nodes per cell  */ * nofNodes * sizeof(int);
+    int bytesCellConnectivity = 1 /*nodes per cell  */ * nofNodes * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per cell */ * nofNodes * sizeof(int);
     int bytesCellTypes       = 1 /*type of line    */ * nofNodes * sizeof(unsigned char);
 
@@ -1515,7 +1628,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1540,7 +1653,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofNodes; c++)
         out.write((char *)&c, sizeof(int));
 
@@ -1586,7 +1699,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3       */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 1 /*nodes per cell */ * nofNodes * sizeof(int);
+    int bytesCellConnectivity = 1 /*nodes per cell */ * nofNodes * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per cell*/ * nofNodes * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct    */ * nofNodes * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar         */ * nofNodes * sizeof(double);
@@ -1611,7 +1724,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1645,7 +1758,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofNodes; c++)
         out.write((char *)&c, sizeof(int));
 
diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.h b/src/basics/basics/writer/WbWriterVtkXmlBinary.h
index 421148d90497e3628ed274439c0b2fd7636b7fd2..0f2c31eda81ad0c1975c9715ac1b7fb37a06339b 100644
--- a/src/basics/basics/writer/WbWriterVtkXmlBinary.h
+++ b/src/basics/basics/writer/WbWriterVtkXmlBinary.h
@@ -93,6 +93,9 @@ public:
     // nodedata);
     // FIXME: hides function in base class
 
+    std::string writeLinesWithLineData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt2> &lines,
+                                       std::vector<std::string> &datanames, std::vector<std::vector<float>> &celldata) override;
+
     //////////////////////////////////////////////////////////////////////////
     // triangles
     //                    2
diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..798b55919df9e24dbc71ecfded5fb8a913cff8cf
--- /dev/null
+++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
@@ -0,0 +1,360 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WbWriterVtkXmlImageBinary.cpp
+//! \ingroup writer
+//! \author Soeren Freudiger, Sebastian Geller, Henry Korb, Henrik Asmuth
+//=======================================================================================
+#include <basics/utilities/UbLogger.h>
+#include <basics/writer/WbWriterVtkXmlImageBinary.h>
+#include <cstring>
+
+using namespace std;
+
+/*===============================================================================*/
+const std::string WbWriterVtkXmlImageBinary::pvdEndTag = "   </Collection>\n</VTKFile>";
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::writeCollection(const string &filename, const vector<string> &filenames,
+                                                  const double &timeStep, const bool &sepGroups)
+{
+    string vtkfilename = filename + ".pvd";
+    ofstream out(vtkfilename.c_str());
+    if (!out) {
+        out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!!
+        string path = UbSystem::getPathFromString(vtkfilename);
+        if (path.size() > 0) {
+            UbSystem::makeDirectory(path);
+            out.open(vtkfilename.c_str());
+        }
+        if (!out)
+            throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
+    }
+
+    string endian;
+    if (UbSystem::isLittleEndian())
+        endian = "LittleEndian";
+    else
+        endian = "BigEndian";
+    out << "<VTKFile type=\"Collection\" version=\"0.1\" byte_order=\"" << endian << "\" >" << endl;
+    out << "   <Collection>" << endl;
+
+    int group = 0, part = 0;
+    for (size_t i = 0; i < filenames.size(); i++) {
+        out << "       <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << part
+            << "\" file=\"" << filenames[i] << "\"/>" << endl;
+        if (sepGroups)
+            group++;
+        else
+            part++;
+    }
+    out << pvdEndTag;
+    out.close();
+
+    return vtkfilename;
+}
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::addFilesToCollection(const string &filename, const vector<string> &filenames,
+                                                       const double &timeStep, const bool &sepGroups)
+{
+    string vtkfilename = filename;
+    fstream test(vtkfilename.c_str(), ios::in);
+    if (!test) {
+        test.clear();
+        vtkfilename += ".pvd";
+        test.open(vtkfilename.c_str(), ios::in);
+        if (!test)
+            return this->writeCollection(filename, filenames, timeStep, sepGroups);
+    }
+
+    fstream out(vtkfilename.c_str(), ios::in | ios::out);
+    out.seekp(-(int)pvdEndTag.size() - 1, ios_base::end);
+
+    int group = 0;
+    for (size_t i = 0; i < filenames.size(); i++) {
+        out << "       <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << i << "\" file=\""
+            << filenames[i] << "\"/>" << endl;
+        if (sepGroups)
+            group++;
+    }
+    out << pvdEndTag;
+
+    return vtkfilename;
+}
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, const UbTupleInt6 &wholeExtent,
+                                                    const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing,
+                                                    vector<string> &pieceSources, vector<UbTupleInt6> &pieceExtents,
+                                                    vector<string> &pointDataNames, vector<string> &cellDataNames)
+{
+    string vtkfilename = filename + ".pvti";
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - start");
+
+    ofstream out(vtkfilename.c_str());
+    if (!out) {
+        out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!!
+        string path = UbSystem::getPathFromString(vtkfilename);
+        if (path.size() > 0) {
+            UbSystem::makeDirectory(path);
+            out.open(vtkfilename.c_str());
+        }
+        if (!out)
+            throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
+    }
+
+    // VTK FILE
+    out << "<VTKFile type=\"PImageData\" version=\"0.1\" byte_order=\"LittleEndian\">"
+        << "\n";
+    out << "  <PImageData "
+            << "WholeExtent=\"" << val<1>(wholeExtent) << " "
+                                << val<2>(wholeExtent) << " " 
+                                << val<3>(wholeExtent) << " " 
+                                << val<4>(wholeExtent) << " " 
+                                << val<5>(wholeExtent) << " "
+                                << val<6>(wholeExtent) << "\" "
+            << "GhostLevel=\"0\" "
+            << "Origin=\""  << val<1>(origin) << " "
+                            << val<2>(origin) << " "
+                            << val<3>(origin) << "\" "
+            << "Spacing=\"" << val<1>(spacing) << " "
+                            << val<2>(spacing) << " "
+                            << val<3>(spacing) << "\" "
+        << "> \n";
+    out << "    <PPointData>\n";
+    for (size_t s = 0; s < pointDataNames.size(); s++)
+        out << "      <PDataArray type=\"Float32\" Name=\"" << pointDataNames[s] << "\"/>\n";
+    out << "    </PPointData>\n";
+    if (cellDataNames.size() > 0) {
+        out << "    <PCellData>\n";
+        for (size_t s = 0; s < cellDataNames.size(); s++)
+            out << "      <PDataArray type=\"Float32\" Name=\"" << cellDataNames[s] << "\"/>\n";
+        out << "    </PCellData>\n";
+    }
+    for (size_t s = 0; s < pieceSources.size(); s++)
+        out << "    <Piece Extent=\""   << val<1>(pieceExtents[s]) << " " 
+                                        << val<2>(pieceExtents[s]) << " " 
+                                        << val<3>(pieceExtents[s]) << " " 
+                                        << val<4>(pieceExtents[s]) << " " 
+                                        << val<5>(pieceExtents[s]) << " "
+                                        << val<6>(pieceExtents[s]) << "\" Source=\"" << pieceSources[s] << "\"/>\n";
+    out << "  </PImageData>\n";
+    out << "</VTKFile>";
+    out << endl;
+    out.close();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - end");
+
+    return vtkfilename;
+}
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::writeOctsWithCellData(const string &filename, vector<UbTupleFloat3> &nodes,
+                                                        vector<UbTupleInt8> & /*cells*/, vector<string> &datanames,
+                                                        vector<vector<double>> &celldata)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - start");
+
+    vector<string> nodeDataNames;
+    vector<vector<double>> nodedata;
+
+    UbTupleFloat3 origin, spacing;
+    UbTupleInt6 extent;
+
+    getMetaDataOfImage(nodes, origin, spacing, extent);
+
+    this->writeData(vtkfilename, nodeDataNames, datanames, nodedata, celldata, extent, origin, spacing, extent);
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - end");
+
+    return vtkfilename;
+}
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::writeOctsWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes,
+                                                        vector<UbTupleUInt8> & /*cells*/, vector<string> &datanames,
+                                                        vector<vector<double>> &nodedata)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - start");
+
+    vector<string> cellDataNames;
+    vector<vector<double>> cellData;
+
+    UbTupleFloat3 origin, spacing;
+    UbTupleInt6 extent;
+
+    getMetaDataOfImage(nodes, origin, spacing, extent);
+
+    this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent);
+
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - end");
+
+    return vtkfilename;
+}
+/*===============================================================================*/
+string WbWriterVtkXmlImageBinary::writeNodesWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes,
+                                                         vector<string> &datanames, vector<vector<double>> &nodedata)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeNodesWithNodeData to " << vtkfilename << " - start");
+
+    vector<string> cellDataNames;
+    vector<vector<double>> cellData;
+
+    UbTupleFloat3 origin, spacing;
+    UbTupleInt6 extent;
+
+    getMetaDataOfImage(nodes, origin, spacing, extent);
+    this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent);
+
+    return vtkfilename;
+}
+
+void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes, UbTupleFloat3 &origin,
+                                                   UbTupleFloat3 &spacing, UbTupleInt6 &extent)
+{
+    int nofNodes = (int)nodes.size();
+    val<1>(origin) = val<1>(nodes[0]);
+    val<2>(origin) = val<2>(nodes[0]);
+    val<3>(origin) = val<3>(nodes[0]);
+
+    float l_x = val<1>(nodes[nofNodes-1])-val<1>(origin);
+    float l_y = val<2>(nodes[nofNodes-1])-val<2>(origin);
+
+    val<1>(spacing) = val<1>(nodes[1])-val<1>(nodes[0]);
+    int nx = (l_x) / val<1>(spacing);
+    val<2>(spacing) = val<2>(nodes[nx])-val<2>(nodes[0]);    
+    int ny = (l_y) / val<2>(spacing);
+    val<3>(spacing) = val<3>(nodes[nx*ny])-val<3>(nodes[0]);
+
+    val<1>(extent) = val<1>(origin) / val<1>(spacing); val<2>(extent) = val<1>(nodes[nofNodes - 1]) / val<1>(spacing);    
+    val<3>(extent) = val<2>(origin) / val<2>(spacing); val<4>(extent) = val<2>(nodes[nofNodes - 1]) / val<2>(spacing);    
+    val<5>(extent) = val<3>(origin) / val<3>(spacing); val<6>(extent) = val<3>(nodes[nofNodes - 1]) / val<3>(spacing);    
+
+}
+
+void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename, vector<string> &pointDataNames,
+                                          vector<string> &cellDataNames, vector<vector<double>> &nodedata,
+                                          vector<vector<double>> &celldata, UbTupleInt6 &wholeExtent,
+                                          UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent,
+                                          unsigned int precision)
+{
+    ofstream out(vtkfilename.c_str(), ios::out | ios::binary);
+    out.precision(precision);
+
+    if (!out) {
+        out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!!
+        string path = UbSystem::getPathFromString(vtkfilename);
+        if (path.size() > 0) {
+            UbSystem::makeDirectory(path);
+            out.open(vtkfilename.c_str(), ios::out | ios::binary);
+        }
+        if (!out)
+            throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
+    }
+
+    size_t nPoints = pointDataNames.size() > 0 ? nodedata[0].size() : celldata[0].size();
+
+    int bytesPerByteVal = 4; //==sizeof(int)
+
+    int bytesScalarData = 1 /*scalar         */ * (int)nPoints * sizeof(double);
+
+    int offset = 0;
+
+    // VTK FILE
+    out << "<?xml version=\"1.0\"?>\n";
+    out << "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\" >"
+        << "\n";
+    out << "   <ImageData "
+            << "WholeExtent=\"" << val<1>(wholeExtent) << " " 
+                                << val<2>(wholeExtent) << " " 
+                                << val<3>(wholeExtent) << " " 
+                                << val<4>(wholeExtent) << " " 
+                                << val<5>(wholeExtent) << " "
+                                << val<6>(wholeExtent) << "\" "
+            << "Origin=\""  << val<1>(origin) << " " 
+                            << val<2>(origin) << " "
+                            << val<3>(origin) << "\" "
+            << "Spacing=\"" << val<1>(spacing) << " " 
+                            << val<2>(spacing) << " " 
+                            << val<3>(spacing) << "\""
+        << "> \n";
+    out << "      <Piece Extent=\"" << val<1>(extent) << " " 
+                                    << val<2>(extent) << " " 
+                                    << val<3>(extent) << " " 
+                                    << val<4>(extent) << " " 
+                                    << val<5>(extent) << " "
+                                    << val<6>(extent) << "\">\n";
+
+    // DATA SECTION
+    if (pointDataNames.size() > 0) {
+        out << "         <PointData>\n";
+        for (size_t s = 0; s < pointDataNames.size(); ++s) {
+            out << "            <DataArray type=\"Float64\" Name=\"" << pointDataNames[s]
+                << "\" format=\"appended\" offset=\"" << offset << "\" /> \n";
+            offset += (bytesPerByteVal + bytesScalarData);
+        }
+        out << "         </PointData>\n";
+    }
+
+    if (cellDataNames.size() > 0) {
+        out << "         <CellData>\n";
+        for (size_t s = 0; s < cellDataNames.size(); ++s) {
+            out << "            <DataArray type=\"Float64\" Name=\"" << cellDataNames[s]
+                << "\" format=\"appended\" offset=\"" << offset << "\" /> \n";
+            offset += (bytesPerByteVal + bytesScalarData);
+        }
+        out << "         </CellData>\n";
+    }
+
+    out << "      </Piece>\n";
+    out << "   </ImageData>\n";
+
+    // AppendedData SECTION
+    out << "   <AppendedData encoding=\"raw\">\n";
+    out << "_";
+
+    // DATA SECTION
+    // pointData
+    for (size_t s = 0; s < pointDataNames.size(); ++s) {
+        out.write((char *)&bytesScalarData, bytesPerByteVal);
+        for (size_t d = 0; d < nodedata[s].size(); ++d) {
+            double tmp = nodedata[s][d];
+            out.write((char *)&tmp, sizeof(double));
+        }
+    }
+
+    // cellData
+    for (size_t s = 0; s < cellDataNames.size(); ++s) {
+        out.write((char *)&bytesScalarData, bytesPerByteVal);
+        for (size_t d = 0; d < celldata[s].size(); ++d) {
+            double tmp = celldata[s][d];
+            out.write((char *)&tmp, sizeof(double));
+        }
+    }
+    out << "\n   </AppendedData>\n";
+    out << "</VTKFile>";
+    out << endl;
+    out.close();
+}
diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h
new file mode 100644
index 0000000000000000000000000000000000000000..c41ff442732e5f65db0f1dd1ec63e5c3ffca1486
--- /dev/null
+++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h
@@ -0,0 +1,110 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WbWriterVtkXmlBinary.h
+//! \ingroup writer
+//! \author Soeren Freudiger, Sebastian Geller
+//=======================================================================================
+#ifndef WBWRITERVTKXMLIMAGEBINARY_H
+#define WBWRITERVTKXMLIMAGEBINARY_H
+
+#include <string>
+
+#include <basics/writer/WbWriter.h>
+
+#include "basics_export.h"
+
+class BASICS_EXPORT WbWriterVtkXmlImageBinary : public WbWriter
+{
+public:
+    static WbWriterVtkXmlImageBinary *getInstance()
+    {
+        static WbWriterVtkXmlImageBinary instance;
+        return &instance;
+    }
+
+    WbWriterVtkXmlImageBinary(const WbWriterVtkXmlImageBinary &) = delete;
+    const WbWriterVtkXmlImageBinary &operator=(const WbWriterVtkXmlImageBinary &) = delete;
+
+private:
+    WbWriterVtkXmlImageBinary() : WbWriter()
+    {
+        if (sizeof(unsigned char) != 1)
+            throw UbException(UB_EXARGS, "machine error char  type mismatch");
+        if (sizeof(int) != 4)
+            throw UbException(UB_EXARGS, "machine error int   type mismatch");
+        if (sizeof(float) != 4)
+            throw UbException(UB_EXARGS, "machine error float type mismatch");
+    }
+
+    static const std::string pvdEndTag;
+
+public:
+    std::string getFileExtension() override { return ".bin.vti"; }
+
+    // write a metafile
+    std::string writeCollection(const std::string &filename, const std::vector<std::string> &filenames,
+                                const double &timestep, const bool &sepGroups);
+    std::string addFilesToCollection(const std::string &filename, const std::vector<std::string> &filenames,
+                                     const double &timestep, const bool &sepGroups);
+    std::string writeParallelFile(const std::string &filename, const UbTupleInt6 &wholeExtent, const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, 
+                                std::vector<std::string> &pieceSources, std::vector<UbTupleInt6> &pieceExtents,
+                                std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames);
+
+    //////////////////////////////////////////////////////////////////////////
+    // nodes
+    std::string writeNodesWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes,
+                                       std::vector<std::string> &datanames,
+                                       std::vector<std::vector<double>> &nodedata) override;
+
+    //////////////////////////////////////////////////////////////////////////
+    // octs
+    //     7 ---- 6
+    //    /|     /|
+    //   4 +--- 5 |
+    //   | |    | |
+    //   | 3 ---+ 2
+    //   |/     |/
+    //   0 ---- 1
+    std::string writeOctsWithCellData(const std::string &filename, std::vector<UbTupleFloat3> &nodes,
+                                      std::vector<UbTupleInt8> &cells, std::vector<std::string> &datanames,
+                                      std::vector<std::vector<double>> &celldata) override;
+    std::string writeOctsWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes,
+                                      std::vector<UbTupleUInt8> &cells, std::vector<std::string> &datanames,
+                                      std::vector<std::vector<double>> &nodedata) override;
+    void writeData(const std::string &vtkfilename,
+                                            std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames,
+                                            std::vector<std::vector<double>> &nodedata, std::vector<std::vector<double>> &celldata, 
+                                            UbTupleInt6 &wholeExtent,
+                                            UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent, unsigned int precision=6);
+
+private:
+    void getMetaDataOfImage(std::vector<UbTupleFloat3> &nodes, UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent);
+};
+
+#endif // WBWRITERVTKXMLIMAGEBINARY_H
diff --git a/src/basics/config/ConfigurationFile.h b/src/basics/config/ConfigurationFile.h
index ef7e7c9f06f94cabb3ba9cbefe95c8ee75736958..4a53f7add85b9c6461fda0bab20fa6656eebc5d3 100644
--- a/src/basics/config/ConfigurationFile.h
+++ b/src/basics/config/ConfigurationFile.h
@@ -64,6 +64,10 @@ public:
    template<class T>
    T getValue(const std::string& key) const;
 
+   //! get value with key and default value
+   template<class T>
+   T getValue(const std::string& key, T defaultValue) const;
+
 private:
    //! the container
    std::map<std::string, std::string> data;
@@ -138,6 +142,19 @@ T ConfigurationFile::getValue(const std::string& key) const
    return x;
 }
 
+template<class T>
+T ConfigurationFile::getValue(const std::string& key, T defaultValue) const
+{
+   if (contains(key))
+   {
+      return getValue<T>(key);
+   }
+   else
+   {
+      return defaultValue;
+   }
+}
+
 }
 
 #endif
diff --git a/src/basics/geometry3d/GbVoxelMatrix3D.cpp b/src/basics/geometry3d/GbVoxelMatrix3D.cpp
index c88f1d13104a5312efd161143d40e835f5654571..dd0fbfc164f951090eedccc4d9bd7d32a7a038f8 100644
--- a/src/basics/geometry3d/GbVoxelMatrix3D.cpp
+++ b/src/basics/geometry3d/GbVoxelMatrix3D.cpp
@@ -39,6 +39,7 @@
 #include <geometry3d/GbTriangle3D.h>
 
 #include <basics/utilities/UbSystem.h>
+#include "lbm/constants/NumericConstants.h"
 
 #ifdef MC_CUBES
 #include <MarchingCubes/MarchingCubes.h>
@@ -173,11 +174,11 @@ double GbVoxelMatrix3D::getIntersectionRaytraceFactor(const double &x1, const do
                                                       const double &rx1, const double &rx2, const double &rx3)
 {
     if (!((UbMath::equal(rx1, 0.0) || UbMath::equal(fabs(rx1), 1.0) ||
-           UbMath::equal(fabs(rx1), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx1), UbMath::one_over_sqrt3)) &&
+           UbMath::equal(fabs(rx1), vf::lbm::constant::one_over_sqrt2) || UbMath::equal(fabs(rx1), vf::lbm::constant::one_over_sqrt3)) &&
           (UbMath::equal(rx2, 0.0) || UbMath::equal(fabs(rx2), 1.0) ||
-           UbMath::equal(fabs(rx2), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx2), UbMath::one_over_sqrt3)) &&
+           UbMath::equal(fabs(rx2), vf::lbm::constant::one_over_sqrt2) || UbMath::equal(fabs(rx2), vf::lbm::constant::one_over_sqrt3)) &&
           (UbMath::equal(rx3, 0.0) || UbMath::equal(fabs(rx3), 1.0) ||
-           UbMath::equal(fabs(rx3), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx3), UbMath::one_over_sqrt3)))) {
+           UbMath::equal(fabs(rx3), vf::lbm::constant::one_over_sqrt2) || UbMath::equal(fabs(rx3), vf::lbm::constant::one_over_sqrt3)))) {
         throw UbException(UB_EXARGS, "nur fuer diskrete Boltzmannrichungen implementiert!!!");
     }
 
diff --git a/src/basics/tests/testUtilities.h b/src/basics/tests/testUtilities.h
index c70d9cc5c11633ded6b696d92692e3d4edf8d2ca..57606edc130b0471b957202420cb12859a9cde84 100644
--- a/src/basics/tests/testUtilities.h
+++ b/src/basics/tests/testUtilities.h
@@ -1,6 +1,8 @@
 #ifndef TESTUTILITIES_H
 #define TESTUTILITIES_H
 
+#include <gmock/gmock.h>
+
 inline auto RealEq = [](auto value) {
 #ifdef VF_DOUBLE_ACCURACY
     return testing::DoubleEq(value);
diff --git a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
index 7e36c2b9c8acd63af35d14ebcc7029a278977a4c..2dc4ab61c602e37ff8edd6397306dad036655e8c 100644
--- a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
+++ b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
@@ -41,7 +41,7 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
 
 //////////////////////////////////////////////////////////////////////////
 IBcumulantK17LBMKernel::IBcumulantK17LBMKernel()
@@ -111,6 +111,7 @@ void IBcumulantK17LBMKernel::calculate(int step)
     //!
 
     using namespace std;
+    using namespace vf::lbm::constant;
 
     //initializing of forcing stuff
     if (withForcing)
@@ -225,66 +226,66 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     LBMReal fEqSolid[D3Q27System::ENDF + 1];
                     LBMReal fPre[D3Q27System::ENDF + 1];
 
-                    f[D3Q27System::DIR_000] = mfbbb;
-
-                    f[D3Q27System::DIR_P00] = mfcbb;
-                    f[D3Q27System::DIR_0P0] = mfbcb;
-                    f[D3Q27System::DIR_00P] = mfbbc;
-                    f[D3Q27System::DIR_PP0] = mfccb;
-                    f[D3Q27System::DIR_MP0] = mfacb;
-                    f[D3Q27System::DIR_P0P] = mfcbc;
-                    f[D3Q27System::DIR_M0P] = mfabc;
-                    f[D3Q27System::DIR_0PP] = mfbcc;
-                    f[D3Q27System::DIR_0MP] = mfbac;
-                    f[D3Q27System::DIR_PPP] = mfccc;
-                    f[D3Q27System::DIR_MPP] = mfacc;
-                    f[D3Q27System::DIR_PMP] = mfcac;
-                    f[D3Q27System::DIR_MMP] = mfaac;
-
-                    f[D3Q27System::DIR_M00] = mfabb;
-                    f[D3Q27System::DIR_0M0] = mfbab;
-                    f[D3Q27System::DIR_00M] = mfbba;
-                    f[D3Q27System::DIR_MM0] = mfaab;
-                    f[D3Q27System::DIR_PM0] = mfcab;
-                    f[D3Q27System::DIR_M0M] = mfaba;
-                    f[D3Q27System::DIR_P0M] = mfcba;
-                    f[D3Q27System::DIR_0MM] = mfbaa;
-                    f[D3Q27System::DIR_0PM] = mfbca;
-                    f[D3Q27System::DIR_MMM] = mfaaa;
-                    f[D3Q27System::DIR_PMM] = mfcaa;
-                    f[D3Q27System::DIR_MPM] = mfaca;
-                    f[D3Q27System::DIR_PPM] = mfcca;
+                    f[vf::lbm::dir::DIR_000] = mfbbb;
+
+                    f[vf::lbm::dir::DIR_P00] = mfcbb;
+                    f[vf::lbm::dir::DIR_0P0] = mfbcb;
+                    f[vf::lbm::dir::DIR_00P] = mfbbc;
+                    f[vf::lbm::dir::DIR_PP0] = mfccb;
+                    f[vf::lbm::dir::DIR_MP0] = mfacb;
+                    f[vf::lbm::dir::DIR_P0P] = mfcbc;
+                    f[vf::lbm::dir::DIR_M0P] = mfabc;
+                    f[vf::lbm::dir::DIR_0PP] = mfbcc;
+                    f[vf::lbm::dir::DIR_0MP] = mfbac;
+                    f[vf::lbm::dir::DIR_PPP] = mfccc;
+                    f[vf::lbm::dir::DIR_MPP] = mfacc;
+                    f[vf::lbm::dir::DIR_PMP] = mfcac;
+                    f[vf::lbm::dir::DIR_MMP] = mfaac;
+
+                    f[vf::lbm::dir::DIR_M00] = mfabb;
+                    f[vf::lbm::dir::DIR_0M0] = mfbab;
+                    f[vf::lbm::dir::DIR_00M] = mfbba;
+                    f[vf::lbm::dir::DIR_MM0] = mfaab;
+                    f[vf::lbm::dir::DIR_PM0] = mfcab;
+                    f[vf::lbm::dir::DIR_M0M] = mfaba;
+                    f[vf::lbm::dir::DIR_P0M] = mfcba;
+                    f[vf::lbm::dir::DIR_0MM] = mfbaa;
+                    f[vf::lbm::dir::DIR_0PM] = mfbca;
+                    f[vf::lbm::dir::DIR_MMM] = mfaaa;
+                    f[vf::lbm::dir::DIR_PMM] = mfcaa;
+                    f[vf::lbm::dir::DIR_MPM] = mfaca;
+                    f[vf::lbm::dir::DIR_PPM] = mfcca;
 
                     if ((*particleData)(x1, x2, x3)->solidFraction > SOLFRAC_MIN) {
-                        fPre[D3Q27System::DIR_000] = mfbbb;
-
-                        fPre[D3Q27System::DIR_P00] = mfcbb;
-                        fPre[D3Q27System::DIR_0P0] = mfbcb;
-                        fPre[D3Q27System::DIR_00P] = mfbbc;
-                        fPre[D3Q27System::DIR_PP0] = mfccb;
-                        fPre[D3Q27System::DIR_MP0] = mfacb;
-                        fPre[D3Q27System::DIR_P0P] = mfcbc;
-                        fPre[D3Q27System::DIR_M0P] = mfabc;
-                        fPre[D3Q27System::DIR_0PP] = mfbcc;
-                        fPre[D3Q27System::DIR_0MP] = mfbac;
-                        fPre[D3Q27System::DIR_PPP] = mfccc;
-                        fPre[D3Q27System::DIR_MPP] = mfacc;
-                        fPre[D3Q27System::DIR_PMP] = mfcac;
-                        fPre[D3Q27System::DIR_MMP] = mfaac;
-
-                        fPre[D3Q27System::DIR_M00] = mfabb;
-                        fPre[D3Q27System::DIR_0M0] = mfbab;
-                        fPre[D3Q27System::DIR_00M] = mfbba;
-                        fPre[D3Q27System::DIR_MM0] = mfaab;
-                        fPre[D3Q27System::DIR_PM0] = mfcab;
-                        fPre[D3Q27System::DIR_M0M] = mfaba;
-                        fPre[D3Q27System::DIR_P0M] = mfcba;
-                        fPre[D3Q27System::DIR_0MM] = mfbaa;
-                        fPre[D3Q27System::DIR_0PM] = mfbca;
-                        fPre[D3Q27System::DIR_MMM] = mfaaa;
-                        fPre[D3Q27System::DIR_PMM] = mfcaa;
-                        fPre[D3Q27System::DIR_MPM] = mfaca;
-                        fPre[D3Q27System::DIR_PPM] = mfcca;
+                        fPre[vf::lbm::dir::DIR_000] = mfbbb;
+
+                        fPre[vf::lbm::dir::DIR_P00] = mfcbb;
+                        fPre[vf::lbm::dir::DIR_0P0] = mfbcb;
+                        fPre[vf::lbm::dir::DIR_00P] = mfbbc;
+                        fPre[vf::lbm::dir::DIR_PP0] = mfccb;
+                        fPre[vf::lbm::dir::DIR_MP0] = mfacb;
+                        fPre[vf::lbm::dir::DIR_P0P] = mfcbc;
+                        fPre[vf::lbm::dir::DIR_M0P] = mfabc;
+                        fPre[vf::lbm::dir::DIR_0PP] = mfbcc;
+                        fPre[vf::lbm::dir::DIR_0MP] = mfbac;
+                        fPre[vf::lbm::dir::DIR_PPP] = mfccc;
+                        fPre[vf::lbm::dir::DIR_MPP] = mfacc;
+                        fPre[vf::lbm::dir::DIR_PMP] = mfcac;
+                        fPre[vf::lbm::dir::DIR_MMP] = mfaac;
+                          
+                        fPre[vf::lbm::dir::DIR_M00] = mfabb;
+                        fPre[vf::lbm::dir::DIR_0M0] = mfbab;
+                        fPre[vf::lbm::dir::DIR_00M] = mfbba;
+                        fPre[vf::lbm::dir::DIR_MM0] = mfaab;
+                        fPre[vf::lbm::dir::DIR_PM0] = mfcab;
+                        fPre[vf::lbm::dir::DIR_M0M] = mfaba;
+                        fPre[vf::lbm::dir::DIR_P0M] = mfcba;
+                        fPre[vf::lbm::dir::DIR_0MM] = mfbaa;
+                        fPre[vf::lbm::dir::DIR_0PM] = mfbca;
+                        fPre[vf::lbm::dir::DIR_MMM] = mfaaa;
+                        fPre[vf::lbm::dir::DIR_PMM] = mfcaa;
+                        fPre[vf::lbm::dir::DIR_MPM] = mfaca;
+                        fPre[vf::lbm::dir::DIR_PPM] = mfcca;
                     }
 
                     (*particleData)(x1, x2, x3)->hydrodynamicForce.fill(0.0);
@@ -302,8 +303,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                                     ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) +
                                    mfbbb;
 
-                    LBMReal rho   = c1 + drho;
-                    LBMReal OOrho = c1 / rho;
+                    LBMReal rho   = c1o1 + drho;
+                    LBMReal OOrho = c1o1 / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
                     LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
@@ -361,39 +362,39 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and
@@ -418,24 +419,24 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     // 2.
-                    LBMReal OxxPyyPzz = c1;
+                    LBMReal OxxPyyPzz = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 3.
                     LBMReal OxyyPxzz =
-                        c8 * (-c2 + omega) * (c1 + c2 * omega) / (-c8 - c14 * omega + c7 * omega * omega);
+                        c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
                     LBMReal OxyyMxzz =
-                        c8 * (-c2 + omega) * (-c7 + c4 * omega) / (c56 - c50 * omega + c9 * omega * omega);
-                    LBMReal Oxyz = c24 * (-c2 + omega) * (-c2 - c7 * omega + c3 * omega * omega) /
-                                   (c48 + c152 * omega - c130 * omega * omega + c29 * omega * omega * omega);
+                        c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
+                    LBMReal Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
+                                   (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
                     ////////////////////////////////////////////////////////////
                     // 4.
-                    LBMReal O4 = c1;
+                    LBMReal O4 = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 5.
-                    LBMReal O5 = c1;
+                    LBMReal O5 = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 6.
-                    LBMReal O6 = c1;
+                    LBMReal O6 = c1o1;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114)
@@ -443,8 +444,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$\omega_2 = 1.0\f$
                     //! (modify for different bulk viscosity).
                     //!
-                    LBMReal A = (c4 + c2 * omega - c3 * omega * omega) / (c2 - c7 * omega + c5 * omega * omega);
-                    LBMReal B = (c4 + c28 * omega - c14 * omega * omega) / (c6 - c21 * omega + c15 * omega * omega);
+                    LBMReal A = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
+                    LBMReal B = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Compute cumulants from central moments according to Eq. (20)-(23) in
@@ -453,45 +454,36 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     // 4.
-                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho;
-                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho;
-                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho;
+                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
 
-                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho -
+                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho -
                                               c1o9 * (drho * OOrho));
-                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho -
+                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho -
                                               c1o9 * (drho * OOrho));
-                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho -
+                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho -
                                               c1o9 * (drho * OOrho));
                     ////////////////////////////////////////////////////////////
                     // 5.
-                    LBMReal CUMbcc =
-                        mfbcc -
-                        ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) +
-                         c1o3 * (mfbca + mfbac)) *
-                            OOrho;
-                    LBMReal CUMcbc =
-                        mfcbc -
-                        ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) +
-                         c1o3 * (mfcba + mfabc)) *
-                            OOrho;
-                    LBMReal CUMccb =
-                        mfccb -
-                        ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) +
-                         c1o3 * (mfacb + mfcab)) *
-                            OOrho;
+                    LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
+                         c1o3 * (mfbca + mfbac)) * OOrho;
+                    LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
+                         c1o3 * (mfcba + mfabc)) * OOrho;
+                    LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
+                         c1o3 * (mfacb + mfcab)) * OOrho;
                     ////////////////////////////////////////////////////////////
                     // 6.
                     LBMReal CUMccc =
-                        mfccc + ((-c4 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                  c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                  c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
+                        mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
+                                  c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
+                                  c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
                                      OOrho +
-                                 (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                  c2 * (mfcaa * mfaca * mfaac) + c16 * mfbba * mfbab * mfabb) *
+                                 (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
+                                  c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
                                      OOrho * OOrho -
                                  c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                                 (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
+                                 (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
                                   (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
                                      OOrho * OOrho * c2o3 +
                                  c1o27 * ((drho * drho - drho) * OOrho * OOrho));
@@ -525,9 +517,9 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we
                     //! need rho times the gradients later.
                     //!
-                    LBMReal Dxy  = -c3 * omega * mfbba;
-                    LBMReal Dxz  = -c3 * omega * mfbab;
-                    LBMReal Dyz  = -c3 * omega * mfabb;
+                    LBMReal Dxy  = -c3o1 * omega * mfbba;
+                    LBMReal Dxz  = -c3o1 * omega * mfbab;
+                    LBMReal Dyz  = -c3o1 * omega * mfabb;
                     LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
                     LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
                     LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
@@ -537,9 +529,9 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
                     mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) -
-                                 c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-                    mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-                    mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+                                 c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+                    mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                    mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                     ////no correction
@@ -559,19 +551,19 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
+                    wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
                     mfbbb += wadjust * (-mfbbb);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
                     mxxyPyzz += wadjust * (-mxxyPyzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
                     mxxyMyzz += wadjust * (-mxxyMyzz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
                     mxxzPyyz += wadjust * (-mxxzPyyz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
                     mxxzMyyz += wadjust * (-mxxzMyyz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
                     mxyyPxzz += wadjust * (-mxyyPxzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
                     mxyyMxzz += wadjust * (-mxyyMxzz);
                     //////////////////////////////////////////////////////////////////////////
                     // no limiter
@@ -587,8 +579,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! - Compute inverse linear combinations of second and third order cumulants
                     //!
                     mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz);
+                    mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+                    mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
                     mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
                     mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
@@ -605,12 +597,12 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! according to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et
                     //! al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc);
-                    CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac);
-                    CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca);
-                    CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc);
-                    CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb);
-                    CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb);
+                    CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+                    CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+                    CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+                    CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+                    CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+                    CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb);
 
                     //////////////////////////////////////////////////////////////////////////
                     // 5.
@@ -630,50 +622,50 @@ void IBcumulantK17LBMKernel::calculate(int step)
 
                     //////////////////////////////////////////////////////////////////////////
                     // 4.
-                    mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho;
-                    mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho;
-                    mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho;
+                    mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
+                    mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+                    mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
 
-                    mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho -
+                    mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
-                    mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho -
+                    mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
-                    mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho -
+                    mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
 
                     //////////////////////////////////////////////////////////////////////////
                     // 5.
                     mfbcc = CUMbcc + c1o3 *
-                                         (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb +
-                                                c2 * (mfbab * mfacb + mfbba * mfabc)) +
+                                         (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb +
+                                                c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
                                           (mfbca + mfbac)) *
                                          OOrho;
                     mfcbc = CUMcbc + c1o3 *
-                                         (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb +
-                                                c2 * (mfabb * mfcab + mfbba * mfbac)) +
+                                         (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb +
+                                                c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
                                           (mfcba + mfabc)) *
                                          OOrho;
                     mfccb = CUMccb + c1o3 *
-                                         (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb +
-                                                c2 * (mfbab * mfbca + mfabb * mfcba)) +
+                                         (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb +
+                                                c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
                                           (mfacb + mfcab)) *
                                          OOrho;
 
                     //////////////////////////////////////////////////////////////////////////
                     // 6.
                     mfccc =
-                        CUMccc - ((-c4 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                   c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                   c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
+                        CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
+                                   c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
+                                   c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
                                       OOrho +
-                                  (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                   c2 * (mfcaa * mfaca * mfaac) + c16 * mfbba * mfbab * mfabb) *
+                                  (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
+                                   c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
                                       OOrho * OOrho -
                                   c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                                  (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
+                                  (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
                                    (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
                                       OOrho * OOrho * c2o3 +
                                   c1o27 * ((drho * drho - drho) * OOrho * OOrho));
@@ -697,39 +689,39 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
                     ////////////////////////////////////////////////////////////////////////////////////
 
                     //////////////////////////////////////////////////////////////////////////
@@ -791,35 +783,35 @@ void IBcumulantK17LBMKernel::calculate(int step)
 
                     (*this->restDistributions)(x1, x2, x3) = mfbbb;
                     //////////////////////////////////////////////////////////////////////////
-                    f[D3Q27System::DIR_000] = mfbbb;
+                    f[vf::lbm::dir::DIR_000] = mfbbb;
                      
-                    f[D3Q27System::DIR_P00]  = mfcbb;
-                    f[D3Q27System::DIR_0P0]  = mfbcb;
-                    f[D3Q27System::DIR_00P]  = mfbbc;
-                    f[D3Q27System::DIR_PP0]  = mfccb;
-                    f[D3Q27System::DIR_MP0]  = mfacb;
-                    f[D3Q27System::DIR_P0P]  = mfcbc;
-                    f[D3Q27System::DIR_M0P]  = mfabc;
-                    f[D3Q27System::DIR_0PP]  = mfbcc;
-                    f[D3Q27System::DIR_0MP]  = mfbac;
-                    f[D3Q27System::DIR_PPP]  = mfccc;
-                    f[D3Q27System::DIR_MPP]  = mfacc;
-                    f[D3Q27System::DIR_PMP]  = mfcac;
-                    f[D3Q27System::DIR_MMP]  = mfaac;
+                    f[vf::lbm::dir::DIR_P00]  = mfcbb;
+                    f[vf::lbm::dir::DIR_0P0]  = mfbcb;
+                    f[vf::lbm::dir::DIR_00P]  = mfbbc;
+                    f[vf::lbm::dir::DIR_PP0]  = mfccb;
+                    f[vf::lbm::dir::DIR_MP0]  = mfacb;
+                    f[vf::lbm::dir::DIR_P0P]  = mfcbc;
+                    f[vf::lbm::dir::DIR_M0P]  = mfabc;
+                    f[vf::lbm::dir::DIR_0PP]  = mfbcc;
+                    f[vf::lbm::dir::DIR_0MP]  = mfbac;
+                    f[vf::lbm::dir::DIR_PPP]  = mfccc;
+                    f[vf::lbm::dir::DIR_MPP]  = mfacc;
+                    f[vf::lbm::dir::DIR_PMP]  = mfcac;
+                    f[vf::lbm::dir::DIR_MMP]  = mfaac;
                                      
-                    f[D3Q27System::DIR_M00]  = mfabb;
-                    f[D3Q27System::DIR_0M0]  = mfbab;
-                    f[D3Q27System::DIR_00M]  = mfbba;
-                    f[D3Q27System::DIR_MM0]  = mfaab;
-                    f[D3Q27System::DIR_PM0]  = mfcab;
-                    f[D3Q27System::DIR_M0M]  = mfaba;
-                    f[D3Q27System::DIR_P0M]  = mfcba;
-                    f[D3Q27System::DIR_0MM]  = mfbaa;
-                    f[D3Q27System::DIR_0PM]  = mfbca;
-                    f[D3Q27System::DIR_MMM]  = mfaaa;
-                    f[D3Q27System::DIR_PMM]  = mfcaa;
-                    f[D3Q27System::DIR_MPM]  = mfaca;
-                    f[D3Q27System::DIR_PPM]  = mfcca;
+                    f[vf::lbm::dir::DIR_M00]  = mfabb;
+                    f[vf::lbm::dir::DIR_0M0]  = mfbab;
+                    f[vf::lbm::dir::DIR_00M]  = mfbba;
+                    f[vf::lbm::dir::DIR_MM0]  = mfaab;
+                    f[vf::lbm::dir::DIR_PM0]  = mfcab;
+                    f[vf::lbm::dir::DIR_M0M]  = mfaba;
+                    f[vf::lbm::dir::DIR_P0M]  = mfcba;
+                    f[vf::lbm::dir::DIR_0MM]  = mfbaa;
+                    f[vf::lbm::dir::DIR_0PM]  = mfbca;
+                    f[vf::lbm::dir::DIR_MMM]  = mfaaa;
+                    f[vf::lbm::dir::DIR_PMM]  = mfcaa;
+                    f[vf::lbm::dir::DIR_MPM]  = mfaca;
+                    f[vf::lbm::dir::DIR_PPM]  = mfcca;
                 }
                     if ((*particleData)(x1, x2, x3)->solidFraction < SOLFRAC_MIN)
                         continue;
@@ -836,8 +828,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     D3Q27System::calcCompFeq(fEqSolid, drho, uPart[0], uPart[1], uPart[2]);
 
                     if ((*particleData)(x1, x2, x3)->solidFraction > SOLFRAC_MAX) {
-                        double const bb0     = fEq[D3Q27System::DIR_000] - fEqSolid[D3Q27System::DIR_000];
-                        f[D3Q27System::DIR_000] = fPre[D3Q27System::DIR_000] + bb0;
+                    double const bb0 = fEq[vf::lbm::dir::DIR_000] - fEqSolid[vf::lbm::dir::DIR_000];
+                    f[vf::lbm::dir::DIR_000] = fPre[vf::lbm::dir::DIR_000] + bb0;
                         for (int iPop = D3Q27System::FSTARTDIR; iPop <= D3Q27System::FENDDIR; iPop++) {
                             const int iOpp        = D3Q27System::INVDIR[iPop];
                             double const bb       = ((fPre[iOpp] - fEq[iOpp]) - (fPre[iPop] - fEqSolid[iPop]));
@@ -860,8 +852,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
 //#endif
                         double const oneMinB = 1. - B;
 
-                        double const bb0 = fEq[D3Q27System::DIR_000] - fEqSolid[D3Q27System::DIR_000];
-                        f[D3Q27System::DIR_000] = fPre[D3Q27System::DIR_000] + oneMinB * (f[D3Q27System::DIR_000] - fPre[D3Q27System::DIR_000]) + B * bb0;
+                        double const bb0 = fEq[vf::lbm::dir::DIR_000] - fEqSolid[vf::lbm::dir::DIR_000];
+                        f[vf::lbm::dir::DIR_000] = fPre[vf::lbm::dir::DIR_000] + oneMinB * (f[vf::lbm::dir::DIR_000] - fPre[vf::lbm::dir::DIR_000]) + B * bb0;
 
                         for (int iPop = D3Q27System::FSTARTDIR; iPop <= D3Q27System::FENDDIR; iPop++) {
                             int const iOpp = D3Q27System::INVDIR[iPop];
@@ -877,35 +869,35 @@ void IBcumulantK17LBMKernel::calculate(int step)
                         }
                     } /* if solidFraction > SOLFRAC_MAX */
 
-                    (*this->restDistributions)(x1, x2, x3)                             = f[D3Q27System::DIR_000];
+                    (*this->restDistributions)(x1, x2, x3)                             = f[vf::lbm::dir::DIR_000];
                                                                                           
-                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)         = f[D3Q27System::DIR_M00];
-                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)         = f[D3Q27System::DIR_0M0];
-                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)         = f[D3Q27System::DIR_00M];
-                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)        = f[D3Q27System::DIR_MM0];
-                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)       = f[D3Q27System::DIR_PM0];
-                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)        = f[D3Q27System::DIR_M0M];
-                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)       = f[D3Q27System::DIR_P0M];
-                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)        = f[D3Q27System::DIR_0MM];
-                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)       = f[D3Q27System::DIR_0PM];
-                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)       = f[D3Q27System::DIR_MMM];
-                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)      = f[D3Q27System::DIR_PMM];
-                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)      = f[D3Q27System::DIR_MPM];
-                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3)     = f[D3Q27System::DIR_PPM];
-                                                                                                          
-                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     =  f[D3Q27System::DIR_P00];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     =  f[D3Q27System::DIR_0P0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     =  f[D3Q27System::DIR_00P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   =  f[D3Q27System::DIR_PP0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    =  f[D3Q27System::DIR_MP0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   =  f[D3Q27System::DIR_P0P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    =  f[D3Q27System::DIR_M0P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   =  f[D3Q27System::DIR_0PP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    =  f[D3Q27System::DIR_0MP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) =  f[D3Q27System::DIR_PPP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  =  f[D3Q27System::DIR_MPP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  =  f[D3Q27System::DIR_PMP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   =  f[D3Q27System::DIR_MMP];
+                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)         = f[vf::lbm::dir::DIR_M00];
+                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)         = f[vf::lbm::dir::DIR_0M0];
+                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)         = f[vf::lbm::dir::DIR_00M];
+                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)        = f[vf::lbm::dir::DIR_MM0];
+                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)       = f[vf::lbm::dir::DIR_PM0];
+                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)        = f[vf::lbm::dir::DIR_M0M];
+                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)       = f[vf::lbm::dir::DIR_P0M];
+                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)        = f[vf::lbm::dir::DIR_0MM];
+                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)       = f[vf::lbm::dir::DIR_0PM];
+                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)       = f[vf::lbm::dir::DIR_MMM];
+                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)      = f[vf::lbm::dir::DIR_PMM];
+                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)      = f[vf::lbm::dir::DIR_MPM];
+                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3)     = f[vf::lbm::dir::DIR_PPM];
+                                                                                              
+                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[vf::lbm::dir::DIR_P00];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[vf::lbm::dir::DIR_0P0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[vf::lbm::dir::DIR_00P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[vf::lbm::dir::DIR_PP0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[vf::lbm::dir::DIR_MP0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[vf::lbm::dir::DIR_P0P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[vf::lbm::dir::DIR_M0P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[vf::lbm::dir::DIR_0PP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[vf::lbm::dir::DIR_0MP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[vf::lbm::dir::DIR_PPP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[vf::lbm::dir::DIR_MPP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[vf::lbm::dir::DIR_PMP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[vf::lbm::dir::DIR_MMP];
                 }
             }
         }
diff --git a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
index 2d5216d3607e4489cc93a062f66efdb6f2c2457a..6b19ada8dbc7bd07239c4086f4ab666f1031f28d 100644
--- a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
+++ b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
@@ -94,15 +94,15 @@ protected:
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
 {
-    using namespace UbMath;
+    using namespace vf::lbm::constant;
     LBMReal m2 = mfa + mfc;
     LBMReal m1 = mfc - mfa;
     LBMReal m0 = m2 + mfb;
     mfa = m0;
     m0 *= Kinverse;
-    m0 += c1;
+    m0 += c1o1;
     mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 ////////////////////////////////////////////////////////////////////////////////
 //! \brief backward chimera transformation \ref backwardInverseChimeraWithK
@@ -112,10 +112,10 @@ inline void IBcumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBM
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
 {
-    using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 - vv) * c1o2) * K;
-    LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (-v2)) * K;
-    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K;
+    using namespace vf::lbm::constant;
+    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    LBMReal m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
     mfa = m0;
     mfb = m1;
 }
@@ -128,10 +128,10 @@ inline void IBcumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LB
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
 {
-    using namespace UbMath;
+    using namespace vf::lbm::constant;
     LBMReal m1 = (mfa + mfc) + mfb;
     LBMReal m2 = mfc - mfa;
-    mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
     mfb = m2 - vv * m1;
     mfa = m1;
 }
@@ -144,9 +144,9 @@ inline void IBcumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, L
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
 {
-    using namespace UbMath;
+    using namespace vf::lbm::constant;
     LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
     mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
     mfb = mb;
     mfa = ma;
diff --git a/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp b/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
index 51d451064ff1b2e45433997b0e3e771b22f19ffb..bf923b2a216df1c6b82c9538daf4f33a65b24fd6 100644
--- a/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
+++ b/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
@@ -66,7 +66,7 @@ void LiggghtsCouplingCoProcessor::setSpheresOnLattice()
         if (excludeFlag)
             continue;
 
-        double x[3], v[3], omega[3];
+        double x[3] = { 0, 0, 0 }, v[3] = { 0, 0, 0 }, omega[3] = { 0, 0, 0 };
         double r;
         int id = wrapper.lmp->atom->tag[iS];
 
@@ -194,7 +194,7 @@ double LiggghtsCouplingCoProcessor::calcSolidFraction(double const dx_, double c
         return 1;
 
     double const r_sq = r_ * r_;
-    double dx_sq[slicesPerDim], dy_sq[slicesPerDim], dz_sq[slicesPerDim];
+    double dx_sq[slicesPerDim] = { 0, 0, 0, 0, 0 }, dy_sq[slicesPerDim] = { 0, 0, 0, 0, 0 }, dz_sq[slicesPerDim] = { 0, 0, 0, 0, 0 };
 
     // pre-calculate d[xyz]_sq for efficiency
     for (int i = 0; i < slicesPerDim; i++) {
@@ -255,13 +255,13 @@ void LiggghtsCouplingCoProcessor::getForcesFromLattice()
     if (nPart == 0)
         return; // no particles - no work
 
-    if (nPart > x_lb.size()) {
-        for (int iPart = 0; iPart < x_lb.size(); iPart++) {
+    if (nPart > (int)x_lb.size()) {
+        for (int iPart = 0; iPart < (int)x_lb.size(); iPart++) {
             x_lb[iPart][0] = wrapper.lmp->atom->x[iPart][0];
             x_lb[iPart][1] = wrapper.lmp->atom->x[iPart][1];
             x_lb[iPart][2] = wrapper.lmp->atom->x[iPart][2];
         }
-        for (int iPart = x_lb.size(); iPart < nPart; iPart++) {
+        for (int iPart = (int)x_lb.size(); iPart < nPart; iPart++) {
             std::array<double, 3> ar = {wrapper.lmp->atom->x[iPart][0],
                                         wrapper.lmp->atom->x[iPart][1],
                                         wrapper.lmp->atom->x[iPart][2]};
@@ -277,12 +277,12 @@ void LiggghtsCouplingCoProcessor::getForcesFromLattice()
         }
     }
 
-    if (n_force > force.size()) {
-        for (int i = 0; i < force.size(); i++) {
+    if (n_force > (int)force.size()) {
+        for (int i = 0; i < (int)force.size(); i++) {
             force[i]  = 0;
             torque[i] = 0;
         }
-        for (int i = force.size(); i < n_force; i++) {
+        for (int i = (int)force.size(); i < n_force; i++) {
             force.push_back(0.);
             torque.push_back(0.);
         }
@@ -367,17 +367,17 @@ void LiggghtsCouplingCoProcessor::SumForceTorque3D(ParticleData::ParticleDataArr
                         // minimum image convention, needed if
                         // (1) PBC are used and
                         // (2) both ends of PBC lie on the same processor
-                        if (dx > nx / 2)
+                        if ((int)dx > nx / 2)
                             dx -= nx;
-                        else if (dx < -nx / 2)
+                        else if ((int)dx < -nx / 2)
                             dx += nx;
-                        if (dy > ny / 2)
+                        if ((int)dy > ny / 2)
                             dy -= ny;
-                        else if (dy < -ny / 2)
+                        else if ((int)dy < -ny / 2)
                             dy += ny;
-                        if (dz > nz / 2)
+                        if ((int)dz > nz / 2)
                             dz -= nz;
-                        else if (dz < -nz / 2)
+                        else if ((int)dz < -nz / 2)
                             dz += nz;
 
                         double const forceX = (*particleData)(ix1, ix2, ix3)->hydrodynamicForce[0];
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
index d66dd3bc64caac711c61f75ed92d7065baaa2699..625fb92149df067639b05435d9b8597b6f96e775 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
@@ -57,14 +57,14 @@ public:
     virtual short getSecondaryBcOption() { return this->secondaryBcOption; }
     virtual void setSecondaryBcOption(const short &val) { this->secondaryBcOption = val; }
 
-    virtual void init(const D3Q27Interactor *const &interactor, const double &time = 0)   = 0;
-    virtual void update(const D3Q27Interactor *const &interactor, const double &time = 0) = 0;
+    virtual void init(const D3Q27Interactor *const &interactor, const real &time = 0)   = 0;
+    virtual void update(const D3Q27Interactor *const &interactor, const real &time = 0) = 0;
 
-    virtual void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                         const double &worldX2, const double &worldX3, const double &time = 0)       = 0;
+    virtual void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                         const real &worldX2, const real &worldX3, const real &time = 0)       = 0;
     virtual void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc,
-                                     const double &worldX1, const double &worldX2, const double &worldX3,
-                                     const double &q, const int &fdirection, const double &time = 0) = 0;
+                                     const real &worldX1, const real &worldX2, const real &worldX3,
+                                     const real &q, const int &fdirection, const real &time = 0) = 0;
 
     void setBcAlgorithm(SPtr<BCAlgorithm> alg)
     {
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
index 5c852528a2abe2bf8de06753f9aaa78bf7f8a565..179007cb6f3f881517c55196420c2cf7135a62f1 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
@@ -69,17 +69,17 @@ void BCAlgorithm::setCompressible(bool c)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactor(LBMReal cf) { collFactor = cf; }
+void BCAlgorithm::setCollFactor(real cf) { collFactor = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorL(LBMReal cf) { collFactorL = cf; }
+void BCAlgorithm::setCollFactorL(real cf) { collFactorL = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorG(LBMReal cf) { collFactorG = cf; }
+void BCAlgorithm::setCollFactorG(real cf) { collFactorG = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorPh(LBMReal cf) { collFactorPh = cf; }
+void BCAlgorithm::setCollFactorPh(real cf) { collFactorPh = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setDensityRatio(LBMReal dr) { densityRatio = dr; }
+void BCAlgorithm::setDensityRatio(real dr) { densityRatio = dr; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setPhiBound(LBMReal phiL, LBMReal phiH)
+void BCAlgorithm::setPhiBound(real phiL, real phiH)
 {
     this->phiL = phiL;
     this->phiH = phiH;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
index f8e77af6d8280e61978740757c682fe9119d9710..fa964f96b4f05801b0dc4afc48d19a68c5b1c133 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
@@ -84,13 +84,13 @@ public:
     void setNodeIndex(int x1, int x2, int x3);
     void setBcPointer(SPtr<BoundaryConditions> bcPtr);
     void setCompressible(bool c);
-    void setCollFactor(LBMReal cf);
+    void setCollFactor(real cf);
 
-    void setCollFactorL(LBMReal cf);
-    void setCollFactorG(LBMReal cf);
-    void setCollFactorPh(LBMReal cf);
-    void setDensityRatio(LBMReal dr);
-    void setPhiBound(LBMReal phiL, LBMReal phiH);
+    void setCollFactorL(real cf);
+    void setCollFactorG(real cf);
+    void setCollFactorPh(real cf);
+    void setDensityRatio(real dr);
+    void setPhiBound(real phiL, real phiH);
 
     char getType();
     bool isPreCollision();
@@ -113,18 +113,18 @@ protected:
     SPtr<BCArray3D> bcArray;
     SPtr<Block3D> block;
 
-    LBMReal collFactor;
-    LBMReal collFactorL, collFactorG, collFactorPh;
-    LBMReal densityRatio;
-    LBMReal phiL, phiH;
+    real collFactor;
+    real collFactorL, collFactorG, collFactorPh;
+    real densityRatio;
+    real phiL, phiH;
     int x1, x2, x3;
 
-    LBMReal compressibleFactor;
+    real compressibleFactor;
 
-    using CalcMacrosFct    = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
-    using CalcFeqForDirFct = LBMReal (*)(const int &, const LBMReal &, const LBMReal &, const LBMReal &,
-                                         const LBMReal &);
-    using CalcFeqFct = void (*)(LBMReal *const &, const LBMReal &, const LBMReal &, const LBMReal &, const LBMReal &);
+    using CalcMacrosFct    = void (*)(const real *const &, real &, real &, real &, real &);
+    using CalcFeqForDirFct = real (*)(const int &, const real &, const real &, const real &,
+                                         const real &);
+    using CalcFeqFct = void (*)(real *const &, const real &, const real &, const real &, const real &);
 
     CalcFeqForDirFct calcFeqsForDirFct;
     CalcMacrosFct calcMacrosFct;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
index 781958f858e54a348358ec11014ef1012779ebc9..286c9a9f7b9ecd131f90a8c6853ed8e250e1f262 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
@@ -33,5 +33,5 @@
 
 #include "BCFunction.h"
 
-const double BCFunction::INFTIMEDEPENDENT = -1.0;
-const double BCFunction::INFCONST         = -10.0;
+const real BCFunction::INFTIMEDEPENDENT = -1.0;
+const real BCFunction::INFCONST         = -10.0;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
index cf1a5a578af00fd4e326f72ac922f2f4d018667f..68f6caefcd50c32f38c9b329f94db85be3f58688 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
@@ -35,6 +35,7 @@
 #define D3Q27BCFUNCTION_H
 
 #include <basics/utilities/UbInfinity.h>
+#include "lbm/constants/D3Q27.h"
 
 #include <muParser.h>
 
@@ -42,21 +43,21 @@
 class BCFunction
 {
 public:
-    static const double INFTIMEDEPENDENT;
-    static const double INFCONST;
+    static const real INFTIMEDEPENDENT;
+    static const real INFCONST;
 
 public:
     BCFunction() : starttime(-Ub::inf), endtime(-Ub::inf) {}
-    BCFunction(const mu::Parser &function, const double &starttime, const double &endtime)
+    BCFunction(const mu::Parser &function, const real &starttime, const real &endtime)
         : function(function), starttime(starttime), endtime(endtime)
     {
     }
-    BCFunction(const std::string &functionstring, const double &starttime, const double &endtime)
+    BCFunction(const std::string &functionstring, const real &starttime, const real &endtime)
         : starttime(starttime), endtime(endtime)
     {
         this->setFunction(functionstring);
     }
-    BCFunction(const double &velocity, const double &starttime, const double &endtime)
+    BCFunction(const real &velocity, const real &starttime, const real &endtime)
         : starttime(starttime), endtime(endtime)
     {
         this->setFunction(velocity);
@@ -64,19 +65,19 @@ public:
 
     void setFunction(const mu::Parser &function) { this->function = function; }
     void setFunction(const std::string &functionstring) { this->function.SetExpr(functionstring); }
-    void setFunction(const double &constVelocity)
+    void setFunction(const real &constVelocity)
     {
         std::stringstream dummy;
         dummy << constVelocity;
         function.SetExpr(dummy.str());
     }
-    void setStartTime(const double &starttime) { this->starttime = starttime; }
-    void setEndTime(const double &endtime) { this->endtime = endtime; }
+    void setStartTime(const real &starttime) { this->starttime = starttime; }
+    void setEndTime(const real &endtime) { this->endtime = endtime; }
 
     mu::Parser &getFunction() { return function; }
     const mu::Parser &getFunction() const { return function; }
-    const double &getStartTime() const { return starttime; }
-    const double &getEndTime() const { return endtime; }
+    const real &getStartTime() const { return starttime; }
+    const real &getEndTime() const { return endtime; }
 
     std::string toString() const
     {
@@ -109,8 +110,8 @@ public:
 
 protected:
     mu::Parser function;
-    double starttime;
-    double endtime;
+    real starttime;
+    real endtime;
 
 private:
 };
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
index fa61e7224ede371f1c28d3eab8e0ba795ccfa3b1..b5318092d225fe4ffc0e52aceb57b446acc9d014 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
@@ -192,61 +192,63 @@ public:
     float getBoundaryVelocityX3() { return this->bcVelocityX3; }
     float getBoundaryVelocity(const int &direction)
     {
+        using namespace vf::lbm::dir;
+
         switch (direction) {
-            case D3Q27System::DIR_P00:
-                return (float)(UbMath::c4o9 *
+            case DIR_P00:
+                return (float)(vf::lbm::constant::c4o9 *
                                (+bcVelocityX1)); //(2/cs^2)(=6)*rho_0(=1 bei inkompr)*wi*u*ei mit cs=1/sqrt(3)
-            case D3Q27System::DIR_M00:
-                return (float)(UbMath::c4o9 *
+            case DIR_M00:
+                return (float)(vf::lbm::constant::c4o9 *
                                (-bcVelocityX1)); // z.B. aus paper manfred MRT LB models in three dimensions (2002)
-            case D3Q27System::DIR_0P0:
-                return (float)(UbMath::c4o9 * (+bcVelocityX2));
-            case D3Q27System::DIR_0M0:
-                return (float)(UbMath::c4o9 * (-bcVelocityX2));
-            case D3Q27System::DIR_00P:
-                return (float)(UbMath::c4o9 * (+bcVelocityX3));
-            case D3Q27System::DIR_00M:
-                return (float)(UbMath::c4o9 * (-bcVelocityX3));
-            case D3Q27System::DIR_PP0:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2));
-            case D3Q27System::DIR_MM0:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2));
-            case D3Q27System::DIR_PM0:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2));
-            case D3Q27System::DIR_MP0:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2));
-            case D3Q27System::DIR_P0P:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3));
-            case D3Q27System::DIR_M0M:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3));
-            case D3Q27System::DIR_P0M:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3));
-            case D3Q27System::DIR_M0P:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3));
-            case D3Q27System::DIR_0PP:
-                return (float)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_0MM:
-                return (float)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_0PM:
-                return (float)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_0MP:
-                return (float)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_PPP:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_MMM:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_PPM:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_MMP:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_PMP:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_MPM:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_PMM:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_MPP:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            case DIR_0P0:
+                return (float)(vf::lbm::constant::c4o9 * (+bcVelocityX2));
+            case DIR_0M0:
+                return (float)(vf::lbm::constant::c4o9 * (-bcVelocityX2));
+            case DIR_00P:
+                return (float)(vf::lbm::constant::c4o9 * (+bcVelocityX3));
+            case DIR_00M:
+                return (float)(vf::lbm::constant::c4o9 * (-bcVelocityX3));
+            case DIR_PP0:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX1 + bcVelocityX2));
+            case DIR_MM0:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX1 - bcVelocityX2));
+            case DIR_PM0:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX1 - bcVelocityX2));
+            case DIR_MP0:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX1 + bcVelocityX2));
+            case DIR_P0P:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX1 + bcVelocityX3));
+            case DIR_M0M:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX1 - bcVelocityX3));
+            case DIR_P0M:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX1 - bcVelocityX3));
+            case DIR_M0P:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX1 + bcVelocityX3));
+            case DIR_0PP:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX2 + bcVelocityX3));
+            case DIR_0MM:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX2 - bcVelocityX3));
+            case DIR_0PM:
+                return (float)(vf::lbm::constant::c1o9 * (+bcVelocityX2 - bcVelocityX3));
+            case DIR_0MP:
+                return (float)(vf::lbm::constant::c1o9 * (-bcVelocityX2 + bcVelocityX3));
+            case DIR_PPP:
+                return (float)(vf::lbm::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            case DIR_MMM:
+                return (float)(vf::lbm::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            case DIR_PPM:
+                return (float)(vf::lbm::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            case DIR_MMP:
+                return (float)(vf::lbm::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            case DIR_PMP:
+                return (float)(vf::lbm::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            case DIR_MPM:
+                return (float)(vf::lbm::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            case DIR_PMM:
+                return (float)(vf::lbm::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            case DIR_MPP:
+                return (float)(vf::lbm::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
             default:
                 throw UbException(UB_EXARGS, "unknown error");
         }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
index b11e303d82f67b107b7c9b9f427d7fa3fb71eb79..86ac7726170c2322e2749f1d4cfaa92033cf7ff9 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
@@ -36,7 +36,7 @@
 
 using namespace std;
 /*==========================================================*/
-DensityBCAdapter::DensityBCAdapter(const double &dens, const double &startTime, const double &endTime)
+DensityBCAdapter::DensityBCAdapter(const real &dens, const real &startTime, const real &endTime)
 {
     this->densBCs.emplace_back(dens, startTime, endTime);
     this->init();
@@ -54,7 +54,7 @@ DensityBCAdapter::DensityBCAdapter(const std::vector<BCFunction> &densBCs)
     this->init();
 }
 /*==========================================================*/
-DensityBCAdapter::DensityBCAdapter(const mu::Parser &function, const double &startTime, const double &endTime)
+DensityBCAdapter::DensityBCAdapter(const mu::Parser &function, const real &startTime, const real &endTime)
 {
     this->densBCs.emplace_back(function, startTime, endTime);
     this->init();
@@ -96,11 +96,11 @@ void DensityBCAdapter::init()
     }
 }
 /*==========================================================*/
-void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const double &time)
+void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const real &time)
 {
     this->timeStep           = time;
     this->tmpDensityFunction = NULL;
-    double maxEndtime        = -Ub::inf;
+    real maxEndtime        = -Ub::inf;
 
     // aktuelle Densityfunction bestimmen
     for (size_t pos = 0; pos < densBCs.size(); ++pos) {
@@ -111,8 +111,8 @@ void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const
 
         if (UbMath::greaterEqual(this->timeStep, densBCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, densBCs[pos].getEndTime()) ||
-                UbMath::equal(densBCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(densBCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(densBCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(densBCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpDensityFunction = &densBCs[pos].getFunction();
                 break;
             }
@@ -130,30 +130,30 @@ void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const
                          << "\", timedependant=" << (this->isTimeDependent() ? "true" : "false"));
 }
 /*==========================================================*/
-void DensityBCAdapter::update(const D3Q27Interactor *const &interactor, const double &time)
+void DensityBCAdapter::update(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->init(interactor, time);
 }
 /*==========================================================*/
 void DensityBCAdapter::adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                           const double & /*worldX1*/, const double & /*worldX2*/,
-                                           const double & /*worldX3*/, const double &q, const int &fdirection,
-                                           const double & /*time*/)
+                                           const real & /*worldX1*/, const real & /*worldX2*/,
+                                           const real & /*worldX3*/, const real &q, const int &fdirection,
+                                           const real & /*time*/)
 {
     bc->setDensityBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-    bc->setQ((float)q, fdirection);
+    bc->setQ((real)q, fdirection);
 }
 /*==========================================================*/
-void DensityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                               const double &worldX2, const double &worldX3, const double &time)
+void DensityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                               const real &worldX2, const real &worldX3, const real &time)
 {
     this->setNodeDensity(interactor, bc, worldX1, worldX2, worldX3, time);
     bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
 void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                      const double &worldX1, const double &worldX2, const double &worldX3,
-                                      const double &timestep)
+                                      const real &worldX1, const real &worldX2, const real &worldX3,
+                                      const real &timestep)
 {
     // Geschwindigkeiten setzen
     try {
@@ -164,7 +164,7 @@ void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SP
         this->timeStep = timestep;
 
         if (tmpDensityFunction)
-            bc->setBoundaryDensity((float)tmpDensityFunction->Eval());
+            bc->setBoundaryDensity((real)tmpDensityFunction->Eval());
     } catch (mu::Parser::exception_type &e) {
         stringstream error;
         error << "mu::parser exception occurs, message(" << e.GetMsg() << "), formula("
@@ -176,7 +176,7 @@ void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SP
     }
 }
 /*==========================================================*/
-double DensityBCAdapter::getDensity(const double &x1, const double &x2, const double &x3, const double &timeStep)
+real DensityBCAdapter::getDensity(const real &x1, const real &x2, const real &x3, const real &timeStep)
 {
     this->x1       = x1;
     this->x2       = x2;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
index 5425d9fcbb57f18ca5e5e57d02133ef8f2a9b8f2..74bfea4dd533ca8bbe81a5941ab302e3ffb06a95 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
@@ -39,7 +39,7 @@
 #include <string>
 #include <vector>
 
-#include "basics/utilities/UbMath.h"
+//#include "basics/utilities/UbMath.h"
 #include "basics/utilities/UbTuple.h"
 
 #include "BCAdapter.h"
@@ -62,25 +62,25 @@ class DensityBCAdapter : public BCAdapter
 public:
     // constructors
     DensityBCAdapter() { this->init(); }
-    DensityBCAdapter(const double &dens, const double &startTime = 0.0, const double &endTime = BCFunction::INFCONST);
+    DensityBCAdapter(const real &dens, const real &startTime = 0.0, const real &endTime = BCFunction::INFCONST);
     DensityBCAdapter(const BCFunction &densBC);
     DensityBCAdapter(const std::vector<BCFunction> &densBCs);
-    DensityBCAdapter(const mu::Parser &function, const double &startTime = 0.0,
-                     const double &endTime = BCFunction::INFCONST);
+    DensityBCAdapter(const mu::Parser &function, const real &startTime = 0.0,
+                     const real &endTime = BCFunction::INFCONST);
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- start
     std::string toString();
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override;
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override;
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override;
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override;
 
-    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                             const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                             const double &time = 0) override;
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                             const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                             const real &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
-    double getDensity(const double &x1, const double &x2, const double &x3, const double &timeStep);
+    real getDensity(const real &x1, const real &x2, const real &x3, const real &timeStep);
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
@@ -92,8 +92,8 @@ protected:
     void unsetTimeDependent() { (this->type &= ~TIMEDEPENDENT); }
 
     void clear() { densBCs.clear(); }
-    void setNodeDensity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                        const double &worldX2, const double &worldX3, const double &timestep);
+    void setNodeDensity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                        const real &worldX2, const real &worldX3, const real &timestep);
 
 private:
     mu::value_type x1, x2, x3; // brauch man nicht serialisieren!
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
index bdddd2369377f1e2b30c86eb243bf4d4a843e06c..fa5dc1bdeff9112a7a0c1a26b9c52ee5f27012a5 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
@@ -55,7 +55,9 @@ void EqDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributi
 //////////////////////////////////////////////////////////////////////////
 void EqDensityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
 
     distributions->getDistributionInv(f, x1, x2, x3);
     int nx1 = x1;
@@ -63,28 +65,28 @@ void EqDensityBCAlgorithm::applyBC()
     int nx3 = x3;
 
     // flag points in direction of fluid
-    if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) {
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
         nx1 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
         nx1 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
         nx2 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
         nx2 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
         nx3 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
         nx3 += 1;
     } else
         UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
-    LBMReal rhoBC = bcPtr->getBoundaryDensity();
+    real rhoBC = bcPtr->getBoundaryDensity();
     for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++) {
         if (bcPtr->hasDensityBoundaryFlag(fdir)) {
             // Ehsan: 15.2.2013:
-            LBMReal ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3);
+            real ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3);
             distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
         }
     }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
index 9d14940929d45bf70268ed415f4d02457a7c09fc..3ed53ee85f2047cedc5cdc6eb71f607ca8792b6f 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
@@ -55,10 +55,10 @@ void HighViscosityNoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void HighViscosityNoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistribution(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
@@ -66,8 +66,8 @@ void HighViscosityNoSlipBCAlgorithm::applyBC()
         if (bcPtr->hasNoSlipBoundaryFlag(fDir)) {
             // quadratic bounce back
             const int invDir = D3Q27System::INVDIR[fDir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal fReturn =
+            real q        = bcPtr->getQ(invDir);
+            real fReturn =
                 (f[invDir] + q * f[fDir] + q * collFactor * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) /
                 (1.0 + q);
             distributions->setDistributionInvForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
index aafa0da55a085b1025c693cf29bfb18730b92882..73e399fb6ab3df0ea06620da4b5c0f6fedc8428e 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
@@ -64,9 +64,9 @@ void MultiphaseNoSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> di
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNoSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
    //LBMReal feq[D3Q27System::ENDF+1];
    //LBMReal heq[D3Q27System::ENDF+1];
    distributions ->getDistributionInv(f, x1, x2, x3);
@@ -87,15 +87,15 @@ void MultiphaseNoSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-		 LBMReal fReturn = f[invDir];
+		 real fReturn = f[invDir];
          //distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
          distributions->setDistributionForDirection(fReturn, x1, x2, x3, invDir);//delay BB 
-         LBMReal hReturn = h[invDir];
+         real hReturn = h[invDir];
 		// distributionsH->setDistributionForDirection(hReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
          distributionsH->setDistributionForDirection(hReturn, x1, x2, x3, invDir);//delay BB  
          if (distributionsH2)
          {
-             LBMReal h2Return = h2[invDir];
+             real h2Return = h2[invDir];
              distributionsH2->setDistributionForDirection(h2Return, x1, x2, x3, invDir);//delay BB
             // distributionsH2->setDistributionForDirection(h2Return, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
index 9d1648ae2b751f304e99b064025f7a7c87a06e28..0a334b37c6096a285ea7dd25b56790de0222098e 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
@@ -70,13 +70,16 @@ void MultiphaseNonReflectingOutflowBCAlgorithm::addDistributionsH2(SPtr<Distribu
 void MultiphaseNonReflectingOutflowBCAlgorithm::applyBC()
 {
     using namespace D3Q27System;
-    using namespace UbMath;
-    LBMReal f[ENDF + 1];
-    LBMReal ftemp[ENDF + 1];
-    LBMReal h[D3Q27System::ENDF + 1];
-    LBMReal htemp[ENDF + 1];
-    LBMReal h2[D3Q27System::ENDF + 1];
-    LBMReal h2temp[ENDF + 1];
+//    using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
+
+    real f[ENDF + 1];
+    real ftemp[ENDF + 1];
+    real h[D3Q27System::ENDF + 1];
+    real htemp[ENDF + 1];
+    real h2[D3Q27System::ENDF + 1];
+    real h2temp[ENDF + 1];
 
     int nx1 = x1;
     int nx2 = x2;
@@ -112,7 +115,7 @@ void MultiphaseNonReflectingOutflowBCAlgorithm::applyBC()
     distributionsH2->getDistribution(h2, x1, x2, x3);
     distributionsH2->getDistribution(h2temp, nx1, nx2, nx3);
 
-    LBMReal /* phi,*/ p1, vx1, vx2, vx3;
+    real /* phi,*/ p1, vx1, vx2, vx3;
 
     // D3Q27System::calcDensity(h, phi);
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
index 230a543f120a8ca8d18c5d2bb6a1c27e550aae92..19fbcc77d39f644addbc6ce79c24f369144dca8a 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
@@ -64,17 +64,19 @@ void MultiphaseSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> dist
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal heq[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real heq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
    distributionsH->getDistributionInv(h, x1, x2, x3);
 
-   LBMReal p1, vx1, vx2, vx3, phi, rho;
+   real p1, vx1, vx2, vx3, phi, rho;
 
    D3Q27System::calcDensity(h, phi);
-   //LBMReal collFactorM = collFactorL + (collFactorL - collFactorG)*(phi - phiH)/(phiH - phiL);
+   //real collFactorM = collFactorL + (collFactorL - collFactorG)*(phi - phiH)/(phiH - phiL);
 
 
    calcMacrosFct(f, p1, vx1, vx2, vx3);
@@ -82,7 +84,7 @@ void MultiphaseSlipBCAlgorithm::applyBC()
    D3Q27System::calcMultiphaseHeq(heq, phi, vx1, vx2, vx3); 
 
    UbTupleFloat3 normale = bcPtr->getNormalVector();
-   LBMReal amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
+   real amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
 
    vx1 = vx1 - amp * val<1>(normale); //normale zeigt von struktur weg!
    vx2 = vx2 - amp * val<2>(normale); //normale zeigt von struktur weg!
@@ -97,44 +99,44 @@ void MultiphaseSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
          //vx3=0;
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1             +vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1             -vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1             -vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1             +vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::lbm::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::lbm::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::lbm::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::lbm::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::lbm::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::lbm::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::lbm::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::lbm::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::lbm::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::lbm::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::lbm::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::lbm::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::lbm::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::lbm::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::lbm::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::lbm::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::lbm::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::lbm::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
 
-		 //LBMReal hReturn = ((1.0-q)/(1.0+q))*((h[invDir]-heq[invDir])/(1.0-collFactorPh)+heq[invDir])+((q/(1.0+q))*(h[invDir]+h[fdir]));
-		 LBMReal hReturn = h[invDir];
+		 //real hReturn = ((1.0-q)/(1.0+q))*((h[invDir]-heq[invDir])/(1.0-collFactorPh)+heq[invDir])+((q/(1.0+q))*(h[invDir]+h[fdir]));
+		 real hReturn = h[invDir];
 		 distributionsH->setDistributionForDirection(hReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
index 7211bc3725b4a2607dc000c739f4bf8e98865013..ee8761f98fe151282cedd24b07b4a608f8ad3873 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
@@ -47,7 +47,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const LBMReal& phiBC, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const real& phiBC, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(function,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(function,startTime,endTime));
@@ -58,7 +58,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
 
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const LBMReal& phiBC, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const real& phiBC, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(function1,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(function2,startTime,endTime));
@@ -67,7 +67,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const string& functionstring, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const string& functionstring, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(functionstring,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(functionstring,startTime,endTime));
@@ -99,9 +99,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const vector< BCFunctio
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                                               const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                                               const double& vx3, const double& vx3StartTime, const double& vx3EndTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                                               const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                                               const real& vx3, const real& vx3StartTime, const real& vx3EndTime )
 {
    this->vx1BCs.push_back(BCFunction(vx1,vx1StartTime,vx1EndTime));
    this->vx2BCs.push_back(BCFunction(vx2,vx2StartTime,vx2EndTime));
@@ -109,9 +109,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const double& vx1, cons
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Function, const double& vx1StartTime, const double& vx1EndTime,
-                                               const string& vx2Function, const double& vx2StartTime, const double& vx2EndTime,
-                                               const string& vx3Function, const double& vx3StartTime, const double& vx3EndTime ) 
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Function, const real& vx1StartTime, const real& vx1EndTime,
+                                               const string& vx2Function, const real& vx2StartTime, const real& vx2EndTime,
+                                               const string& vx3Function, const real& vx3StartTime, const real& vx3EndTime ) 
 {
    if(vx1Function.size()) this->vx1BCs.push_back(BCFunction(vx1Function,vx1StartTime,vx1EndTime));
    if(vx2Function.size()) this->vx2BCs.push_back(BCFunction(vx2Function,vx2StartTime,vx2EndTime));
@@ -119,9 +119,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Functi
    this->init();
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::setNewVelocities(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                                              const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                                              const double& vx3, const double& vx3StartTime, const double& vx3EndTime )
+void MultiphaseVelocityBCAdapter::setNewVelocities(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                                              const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                                              const real& vx3, const real& vx3StartTime, const real& vx3EndTime )
 {
    this->clear();
    this->vx1BCs.push_back(BCFunction(vx1,vx1StartTime,vx1EndTime));
@@ -174,13 +174,13 @@ void MultiphaseVelocityBCAdapter::init(std::vector<BCFunction>& vxBCs)
    }
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor, const double& time)
+void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor, const real& time)
 {
    this->timeStep       = time;
    this->tmpVx1Function = this->tmpVx2Function = this->tmpVx3Function = NULL;
 
    //aktuelle velocityfunction bestimmen
-   double maxEndtime = -Ub::inf;
+   real maxEndtime = -Ub::inf;
    
    for(size_t pos=0; pos<vx1BCs.size(); ++pos)
    {
@@ -190,8 +190,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx1BCs[pos].getStartTime()) ) 
       {
           if(   UbMath::lessEqual( this->timeStep     , vx1BCs[pos].getEndTime()     )
-             || UbMath::equal(     vx1BCs[pos].getEndTime(), (double)BCFunction::INFCONST        )
-             || UbMath::equal(     vx1BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)  )
+             || UbMath::equal(     vx1BCs[pos].getEndTime(), (real)BCFunction::INFCONST        )
+             || UbMath::equal(     vx1BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)  )
          {
             tmpVx1Function = &vx1BCs[pos].getFunction();
             break;
@@ -206,8 +206,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx2BCs[pos].getStartTime()) ) 
       {
          if(   UbMath::lessEqual( this->timeStep     , vx2BCs[pos].getEndTime()      )
-            || UbMath::equal(     vx2BCs[pos].getEndTime(), (double)BCFunction::INFCONST         )
-            || UbMath::equal(     vx2BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT )  )
+            || UbMath::equal(     vx2BCs[pos].getEndTime(), (real)BCFunction::INFCONST         )
+            || UbMath::equal(     vx2BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT )  )
          {
             tmpVx2Function = &vx2BCs[pos].getFunction();
             break;
@@ -222,8 +222,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx3BCs[pos].getStartTime()) ) 
       {
          if(   UbMath::lessEqual( this->timeStep     , vx3BCs[pos].getEndTime()      )
-            || UbMath::equal(     vx3BCs[pos].getEndTime(), (double)BCFunction::INFCONST         )
-            || UbMath::equal(     vx3BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT )  )
+            || UbMath::equal(     vx3BCs[pos].getEndTime(), (real)BCFunction::INFCONST         )
+            || UbMath::equal(     vx3BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT )  )
          {
             tmpVx3Function = &vx3BCs[pos].getFunction();
             break;
@@ -266,24 +266,24 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
                    <<", timedependent="<<boolalpha<<this->isTimeDependent()   );
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::update( const D3Q27Interactor* const& interactor, const double& time ) 
+void MultiphaseVelocityBCAdapter::update( const D3Q27Interactor* const& interactor, const real& time ) 
 {
    this->init(interactor,time);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::adaptBCForDirection( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& q, const int& fdirection, const double& time )
+void MultiphaseVelocityBCAdapter::adaptBCForDirection( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& q, const int& fdirection, const real& time )
 {
    bc->setVelocityBoundaryFlag(D3Q27System::INVDIR[fdirection],secondaryBcOption);
-   bc->setQ((float)q,fdirection);
+   bc->setQ((real)q,fdirection);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::adaptBC( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& time ) 
+void MultiphaseVelocityBCAdapter::adaptBC( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& time ) 
 {
    this->setNodeVelocity(interactor,bc,worldX1,worldX2,worldX3,time);
    bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& timestep) 
+void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& timestep) 
 {
    //Geschwindigkeiten setzen
    try
@@ -294,9 +294,9 @@ void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& intera
       this->x3 = worldX3;
       this->timeStep = timestep;
 
-      if(tmpVx1Function) bc->setBoundaryVelocityX1((float)tmpVx1Function->Eval());  
-      if(tmpVx2Function) bc->setBoundaryVelocityX2((float)tmpVx2Function->Eval());
-      if(tmpVx3Function) bc->setBoundaryVelocityX3((float)tmpVx3Function->Eval());
+      if(tmpVx1Function) bc->setBoundaryVelocityX1((real)tmpVx1Function->Eval());  
+      if(tmpVx2Function) bc->setBoundaryVelocityX2((real)tmpVx2Function->Eval());
+      if(tmpVx3Function) bc->setBoundaryVelocityX3((real)tmpVx3Function->Eval());
 	  bc->setBoundaryPhaseField(this->phiBC);
    }
    catch(mu::Parser::exception_type& e){ stringstream error; error<<"mu::parser exception occurs, message("<<e.GetMsg()<<"), formula("<<e.GetExpr()+"), token("+e.GetToken()<<")"
@@ -304,11 +304,11 @@ void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& intera
    catch(...)                          { throw UbException(UB_EXARGS,"unknown exception" ); }
 }
 /*==========================================================*/
-UbTupleDouble3 MultiphaseVelocityBCAdapter::getVelocity(const double& x1, const double& x2, const double& x3, const double& timeStep) const
+UbTupleDouble3 MultiphaseVelocityBCAdapter::getVelocity(const real& x1, const real& x2, const real& x3, const real& timeStep) const
 {
-	double vx1 = 0.0;
-	double vx2 = 0.0;
-	double vx3 = 0.0;
+	real vx1 = 0.0;
+	real vx2 = 0.0;
+	real vx3 = 0.0;
    this->x1 = x1;
    this->x2 = x2;
    this->x3 = x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
index 998462398294d23725ec9985a8fcbd06fe9f65ad..60d93cdecd141834b9800c08fc9b6d1e4fab3c92 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
@@ -92,11 +92,11 @@ public:
    
    MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const BCFunction& velVxBC );
 
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const LBMReal& phiBC, const double& startTime, const double& endTime  );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const real& phiBC, const real& startTime, const real& endTime  );
 
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const LBMReal& phiBC, const double& startTime, const double& endTime );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const real& phiBC, const real& startTime, const real& endTime );
    
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const std::string& functionstring, const double& startTime, const double& endTime );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const std::string& functionstring, const real& startTime, const real& endTime );
 
    MultiphaseVelocityBCAdapter(const BCFunction& velBC, bool x1Dir, bool x2Dir, bool x3Dir);
 
@@ -104,13 +104,13 @@ public:
 
    MultiphaseVelocityBCAdapter(const std::vector< BCFunction >& velVx1BCs, const std::vector< BCFunction >& velVx2BCs, const std::vector< BCFunction >& velVx3BCs);
 
-   MultiphaseVelocityBCAdapter(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                          const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                          const double& vx3, const double& vx3StartTime, const double& vx3EndTime);
+   MultiphaseVelocityBCAdapter(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                          const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                          const real& vx3, const real& vx3StartTime, const real& vx3EndTime);
 
-   MultiphaseVelocityBCAdapter(const std::string& vx1Function, const double& vx1StartTime, const double& vx1EndTime,
-                          const std::string& vx2Function, const double& vx2StartTime, const double& vx2EndTime,
-                          const std::string& vx3Function, const double& vx3StartTime, const double& vx3EndTime ); 
+   MultiphaseVelocityBCAdapter(const std::string& vx1Function, const real& vx1StartTime, const real& vx1EndTime,
+                          const std::string& vx2Function, const real& vx2StartTime, const real& vx2EndTime,
+                          const std::string& vx3Function, const real& vx3StartTime, const real& vx3EndTime ); 
 
    //methods
    void setTimePeriodic()    { (this->type |=   TIMEPERIODIC); }
@@ -118,26 +118,26 @@ public:
    bool isTimePeriodic()     { return ((this->type & TIMEPERIODIC) ==  TIMEPERIODIC); }
 
    //folgendes ist fuer moving objects gedadacht... 
-   void setNewVelocities(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                         const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                         const double& vx3, const double& vx3StartTime, const double& vx3EndTime);
+   void setNewVelocities(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                         const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                         const real& vx3, const real& vx3StartTime, const real& vx3EndTime);
 
       
    //------------- implements D3Q27BoundaryConditionAdapter ----- start
    std::string toString();
    
-   void init(const D3Q27Interactor* const& interactor, const double& time=0);
-   void update(const D3Q27Interactor* const& interactor, const double& time=0);
+   void init(const D3Q27Interactor* const& interactor, const real& time=0);
+   void update(const D3Q27Interactor* const& interactor, const real& time=0);
 
-   void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                            const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                            const double &time = 0);
-   void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                const double &worldX2, const double &worldX3, const double &time = 0);
+   void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                            const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                            const real &time = 0);
+   void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                const real &worldX2, const real &worldX3, const real &time = 0);
 
    //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
-   UbTupleDouble3 getVelocity(const double& x1, const double& x2, const double& x3, const double& timeStep) const;
+   UbTupleDouble3 getVelocity(const real& x1, const real& x2, const real& x3, const real& timeStep) const;
 
 
 protected:
@@ -149,7 +149,7 @@ protected:
    void unsetTimeDependent() { (this->type &=  ~TIMEDEPENDENT); }
 
    void clear() { vx1BCs.clear(); vx2BCs.clear();  vx3BCs.clear(); this->init(); }
-   void setNodeVelocity(const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& timestep);
+   void setNodeVelocity(const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& timestep);
 
 private:
    mutable mu::value_type x1, x2, x3;
@@ -163,7 +163,7 @@ private:
    std::vector<BCFunction> vx2BCs;
    std::vector<BCFunction> vx3BCs;
 
-   LBMReal phiBC;
+   real phiBC;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
index 0177be9e1aedb08adab9919429dbc0d17654e3a7..b6ddf4b46925e770cfcdcc5390d41ed816b992bc 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
@@ -68,18 +68,20 @@ void MultiphaseVelocityBCAlgorithm::addDistributionsH2(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal heq[D3Q27System::ENDF+1];
-   LBMReal htemp[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real feq[D3Q27System::ENDF+1];
+   real heq[D3Q27System::ENDF+1];
+   real htemp[D3Q27System::ENDF+1];
    
    distributions->getDistributionInv(f, x1, x2, x3);
    distributionsH->getDistributionInv(h, x1, x2, x3);
    if (distributionsH2)
        distributionsH2->getDistributionInv(h2, x1, x2, x3);
-   LBMReal phi, vx1, vx2, vx3, p1, phiBC;
+   real phi, vx1, vx2, vx3, p1, phiBC;
    
    D3Q27System::calcDensity(h, phi);
 
@@ -98,12 +100,12 @@ void MultiphaseVelocityBCAlgorithm::applyBC()
    int nx3 = x3;
 
    //flag points in direction of fluid
-   if      (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+   if      (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
    //else UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
    
    phiBC = bcPtr->getBoundaryPhaseField();
@@ -129,14 +131,14 @@ void MultiphaseVelocityBCAlgorithm::applyBC()
       {
          const int invDir = D3Q27System::INVDIR[fdir];
          //LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
 		 //16.03.2021 quick fix for velocity BC
-         LBMReal fReturn = f[invDir] - velocity;
+         real fReturn = f[invDir] - velocity;
          //LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity)/(1.0+q));
         // distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);//no delay BB
          distributions->setDistributionForDirection(fReturn, x1, x2, x3, invDir);//delay BB  
 
-         LBMReal hReturn = htemp[invDir] + h[invDir] - heq[invDir] - velocity*phi;
+         real hReturn = htemp[invDir] + h[invDir] - heq[invDir] - velocity*phi;
          distributionsH->setDistributionForDirection(hReturn, x1, x2, x3, invDir);//delay BB  
          if (distributionsH2) {
              fReturn = h2[invDir] ;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
index 68ebf73ffeafe88f9184c46a1144840fae8b27e1..52eda33082e8031454b00f578a6f520b738c0d42 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
@@ -43,18 +43,18 @@ public:
     NoSlipBCAdapter() : BCAdapter() {}
     NoSlipBCAdapter(const short &secondaryBcOption) : BCAdapter(secondaryBcOption) {}
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override {}
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override {}
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override {}
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override {}
 
     void adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                             const double & /*worldX1*/, const double & /*worldX2*/, const double & /*worldX3*/,
-                             const double &q, const int &fdirection, const double & /*time*/ = 0) override
+                             const real & /*worldX1*/, const real & /*worldX2*/, const real & /*worldX3*/,
+                             const real &q, const int &fdirection, const real & /*time*/ = 0) override
     {
         bc->setNoSlipBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-        bc->setQ((float)q, fdirection);
+        bc->setQ((real)q, fdirection);
     }
-    void adaptBC(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc, const double & /*worldX1*/,
-                 const double & /*worldX2*/, const double & /*worldX3*/, const double & /*time*/ = 0) override
+    void adaptBC(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc, const real & /*worldX1*/,
+                 const real & /*worldX2*/, const real & /*worldX3*/, const real & /*time*/ = 0) override
     {
         bc->setBcAlgorithmType(algorithmType);
     }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
index d82a7865b1dc4542025b896914a5320495024bd6..f89c74513289ab2787cbef0dbe504d913510afc5 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
@@ -54,10 +54,10 @@ void NoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions
 //////////////////////////////////////////////////////////////////////////
 void NoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
@@ -65,8 +65,8 @@ void NoSlipBCAlgorithm::applyBC()
         if (bcPtr->hasNoSlipBoundaryFlag(fdir)) {
             // quadratic bounce back
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
+            real q        = bcPtr->getQ(invDir);
+            real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
                               ((q / (1.0 + q)) * (f[invDir] + f[fdir]));
             distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
                                                        x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
index cf7a627b0c649aa0e1d00a8b137225b9e65b8476..2787d685cbd3b71b879c3fabc1c114e0e61eef8d 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
@@ -56,40 +56,42 @@ void NonEqDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distrib
 //////////////////////////////////////////////////////////////////////////
 void NonEqDensityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
     int nx1 = x1;
     int nx2 = x2;
     int nx3 = x3;
 
     // flag points in direction of fluid
-    if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) {
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
         nx1 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
         nx1 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
         nx2 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
         nx2 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
         nx3 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
         nx3 += 1;
     } else
         return; // UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     // LBMReal vlimit=0.01;
     // vx1=(fabs(vx1)>vlimit) ? vx1/fabs(vx1)*vlimit : vx1;
     // vx2=(fabs(vx2)>vlimit) ? vx2/fabs(vx2)*vlimit : vx2;
     // vx3=(fabs(vx3)>vlimit) ? vx3/fabs(vx3)*vlimit : vx3;
-    LBMReal rhoBC = bcPtr->getBoundaryDensity();
+    real rhoBC = bcPtr->getBoundaryDensity();
     for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++) {
         if (bcPtr->hasDensityBoundaryFlag(fdir)) {
             // Martins NEQ ADDON
             ////original: 15.2.2013:
-            LBMReal ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
+            real ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
             // rhoBC=(rho>rhoBC)? rhoBC : rho; //Limiter 08.08.2018
             ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3) + f[fdir] - ftemp;
             distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
index 6fa4c7b5d85f4b1e5135f95b48f7d75a0cdbf3a4..efa587d128dbea3ed4403098b4c50328186fddf0 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
@@ -57,11 +57,14 @@ void NonReflectingOutflowBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void NonReflectingOutflowBCAlgorithm::applyBC()
 {
+    using namespace vf::lbm::dir;
+
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+    using namespace vf::lbm::constant;
 
-    LBMReal f[ENDF + 1];
-    LBMReal ftemp[ENDF + 1];
+    real f[ENDF + 1];
+    real ftemp[ENDF + 1];
 
     int nx1       = x1;
     int nx2       = x2;
@@ -93,20 +96,20 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
     distributions->getDistribution(f, x1, x2, x3);
     distributions->getDistribution(ftemp, nx1, nx2, nx3);
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
 
     switch (direction) {
         case DIR_P00:
-            f[DIR_P00]   = ftemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P00];
-            f[DIR_PP0]  = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PP0];
-            f[DIR_PM0]  = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PM0];
-            f[DIR_P0P]  = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0P];
-            f[DIR_P0M]  = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0M];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPP];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMP];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPM];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMM];
+            f[DIR_P00]   = ftemp[DIR_P00] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PMM];
 
             distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
             distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -119,15 +122,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
             break;
         case DIR_M00:
-            f[DIR_M00]   = ftemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M00];
-            f[DIR_MP0]  = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MP0];
-            f[DIR_MM0]  = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MM0];
-            f[DIR_M0P]  = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0P];
-            f[DIR_M0M]  = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0M];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMP];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMM];
+            f[DIR_M00]   = ftemp[DIR_M00] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M00];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MP0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MM0];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M0P];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
             distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -140,15 +143,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
             break;
         case DIR_0P0:
-            f[DIR_0P0]   = ftemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0P0];
-            f[DIR_PP0]  = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PP0];
-            f[DIR_MP0]  = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MP0];
-            f[DIR_0PP]  = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PP];
-            f[DIR_0PM]  = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PM];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPP];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPP];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPM];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPM];
+            f[DIR_0P0]   = ftemp[DIR_0P0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0P0];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PP0];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MP0];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0PP];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MPM];
 
             distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
             distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -161,15 +164,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
             break;
         case DIR_0M0:
-            f[DIR_0M0]   = ftemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0M0];
-            f[DIR_PM0]  = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PM0];
-            f[DIR_MM0]  = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MM0];
-            f[DIR_0MP]  = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MP];
-            f[DIR_0MM]  = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MM];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMP];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMM];
+            f[DIR_0M0]   = ftemp[DIR_0M0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0M0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PM0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MM0];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0MP];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
             distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -182,15 +185,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
             break;
         case DIR_00P:
-            f[DIR_00P]   = ftemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_00P];
-            f[DIR_P0P]  = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_P0P];
-            f[DIR_M0P]  = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_M0P];
-            f[DIR_0PP]  = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0PP];
-            f[DIR_0MP]  = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0MP];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PPP];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MPP];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PMP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MMP];
+            f[DIR_00P]   = ftemp[DIR_00P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_00P];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_P0P];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_M0P];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_0PP];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_MMP];
 
             distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
             distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -203,15 +206,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
             break;
         case DIR_00M:
-            f[DIR_00M]   = ftemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_00M];
-            f[DIR_P0M]  = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_P0M];
-            f[DIR_M0M]  = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_M0M];
-            f[DIR_0PM]  = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0PM];
-            f[DIR_0MM]  = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0MM];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PPM];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MPM];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PMM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MMM];
+            f[DIR_00M]   = ftemp[DIR_00M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_00M];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_P0M];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_M0M];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_0PM];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
             distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
index d4a99846b6da226bf8d1d09e66763db61a90d2b0..45c9c0c21dba308862c8d4a8c1c1827a4a07e7c7 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
@@ -52,7 +52,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override 
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override 
    { 
       return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
index 9673a009f75bccd71924985ec9a27187d9e1e12e..2837238c40ec02bffe7a8eccb4fedb5100846d55 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
@@ -52,7 +52,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override 
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override 
    { 
       return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
index 19220dbd57f9100e71dbf611cccad7fadf8fae1e..c9b76b563dd16044ee109acbdc8eff73cde95959 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
@@ -51,7 +51,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
    {
       return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
index 5d2ec04aac4280a141e6f3b2044c56c8eed842db..73bf54ad7eccbd42deb2454fa5d0a060cf6b5c15 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
@@ -42,15 +42,15 @@ void RheologyNoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distr
 //////////////////////////////////////////////////////////////////////////
 void RheologyNoSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF + 1];
-   LBMReal feq[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF + 1];
+   real feq[D3Q27System::ENDF + 1];
    distributions->getDistribution(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3;
+   real rho, vx1, vx2, vx3;
    calcMacrosFct(f, rho, vx1, vx2, vx3);
    calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-   LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
-   LBMReal collFactorF = getRheologyCollFactor(collFactor, shearRate, rho);
+   real shearRate = D3Q27System::getShearRate(f, collFactor);
+   real collFactorF = getRheologyCollFactor(collFactor, shearRate, rho);
 
    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++)
    {
@@ -58,8 +58,8 @@ void RheologyNoSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fDir];
-         LBMReal q = bcPtr->getQ(invDir);
-         LBMReal fReturn =(f[invDir] + q * f[fDir] + q * collFactorF * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) / (1.0 + q);
+         real q = bcPtr->getQ(invDir);
+         real fReturn =(f[invDir] + q * f[fDir] + q * collFactorF * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) / (1.0 + q);
          distributions->setDistributionInvForDirection(fReturn, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], invDir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
index 71be7caf0787edb38877d6c3bb0f891c095ead05..c8c38ad7fcf3e35378b1e5dd14938cdad230f185 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
@@ -43,10 +43,10 @@ class RheologyNoSlipBCAlgorithm : public BCAlgorithm
 public:
    RheologyNoSlipBCAlgorithm() = default;
    ~RheologyNoSlipBCAlgorithm() = default;
-   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("LBMReal clone() - belongs in the derived class")); }
+   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("real clone() - belongs in the derived class")); }
    void addDistributions(SPtr<DistributionArray3D> distributions) override;
    void applyBC() override;
 protected:
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const = 0; // { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const = 0; // { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 };
 #endif // RheologyNoSlipBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
index 49a7df6be41f37dd4dc2ac7a67f8d8645aa70c15..a6a3a5a745f193d66f2d87303ab5df1fd62826eb 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
@@ -51,7 +51,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
    {
       return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
index e2979d462d5ec1d166cac48c67c903cf280b5ff2..9f7881af1705fca4ef24402f0fed4dbcb701127b 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
@@ -51,15 +51,15 @@ void RheologyVelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> dis
 //////////////////////////////////////////////////////////////////////////
 void RheologyVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
-    LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
-    LBMReal collFactorF = getRheologyCollFactor(collFactor, shearRate, drho);
+    real shearRate = D3Q27System::getShearRate(f, collFactor);
+    real collFactorF = getRheologyCollFactor(collFactor, shearRate, drho);
 
     rho = 1.0+drho*compressibleFactor;
 
@@ -68,9 +68,9 @@ void RheologyVelocityBCAlgorithm::applyBC()
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactorF)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactorF)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
index fc523c4619b8f9c804b2366b671db7475495e151..91ac9ec574b9252c4d2842b9134d4190878d9daf 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
@@ -43,11 +43,11 @@ class RheologyVelocityBCAlgorithm : public BCAlgorithm
 public:
    RheologyVelocityBCAlgorithm();
    ~RheologyVelocityBCAlgorithm();
-   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("LBMReal clone() - belongs in the derived class")); }
+   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("real clone() - belongs in the derived class")); }
    void addDistributions(SPtr<DistributionArray3D> distributions) override;
    void applyBC() override;
 protected:
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const = 0; // { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const = 0; // { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 };
 
 #endif // RheologyVelocityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
index 151e10be4987e27622ce25b86c91c320c0d24406..5dbe30e7b172e149dcfe812f5ba9297d6caaa3df 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
@@ -58,17 +58,19 @@ void SimpleSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distribut
 //////////////////////////////////////////////////////////////////////////
 void SimpleSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal vx1, vx2, vx3, drho, rho;
+   real vx1, vx2, vx3, drho, rho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
    rho = 1.0 + drho * compressibleFactor;
 
    UbTupleFloat3 normale = bcPtr->getNormalVector();
-   LBMReal amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
+   real amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
 
    vx1 = vx1 - amp * val<1>(normale); //normale zeigt von struktur weg!
    vx2 = vx2 - amp * val<2>(normale); //normale zeigt von struktur weg!
@@ -80,38 +82,38 @@ void SimpleSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1+vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1-vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1-vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1+vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::lbm::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::lbm::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::lbm::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::lbm::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::lbm::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::lbm::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::lbm::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::lbm::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::lbm::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::lbm::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::lbm::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::lbm::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::lbm::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::lbm::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::lbm::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::lbm::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::lbm::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::lbm::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = f[invDir] - velocity * rho;
+         real fReturn = f[invDir] - velocity * rho;
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
index 6529ea85184f5b2d86a977e64008437fe0401491..83badd723e2c9bda222abaccdb09fbc352bc46af 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
@@ -58,10 +58,10 @@ void SimpleVelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distr
 //////////////////////////////////////////////////////////////////////////
 void SimpleVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal vx1, vx2, vx3, drho;
+   real vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
@@ -70,8 +70,8 @@ void SimpleVelocityBCAlgorithm::applyBC()
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-         LBMReal fReturn = f[invDir] - velocity;
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
+         real fReturn = f[invDir] - velocity;
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
index 0dc2d5d66e639b3b46bc9fe12cec96eba6e6adac..8f8299850d2299f75903a42a2b59512e093d6172 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
@@ -46,9 +46,11 @@
 //   return D3Q27SlipBCAdapterCreator::getInstance();
 //}
 //*==========================================================*/
-void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double & /*worldX1*/,
-                            const double & /*worldX2*/, const double & /*worldX3*/, const double & /*time*/)
+void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real & /*worldX1*/,
+                            const real & /*worldX2*/, const real & /*worldX3*/, const real & /*time*/)
 {
+    using namespace vf::lbm::dir;
+
     //////////////////////////////////////////////////////////////////////////
     //>>> nur workaround! -> Hendrick nach normalen berechnung aus qs fragen
 
@@ -56,17 +58,17 @@ void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryCond
     if (!geo)
         throw UbException(UB_EXARGS, "derzeit nur fuer Cubes valide");
 
-    if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_P00))
+    if (bc->hasSlipBoundaryFlag(DIR_P00))
         bc->setNormalVector(1.0, 0.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_M00))
+    else if (bc->hasSlipBoundaryFlag(DIR_M00))
         bc->setNormalVector(-1.0, 0.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_0P0))
+    else if (bc->hasSlipBoundaryFlag(DIR_0P0))
         bc->setNormalVector(0.0, 1.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_0M0))
+    else if (bc->hasSlipBoundaryFlag(DIR_0M0))
         bc->setNormalVector(0.0, -1.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_00P))
+    else if (bc->hasSlipBoundaryFlag(DIR_00P))
         bc->setNormalVector(0.0, 0.0, 1.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_00M))
+    else if (bc->hasSlipBoundaryFlag(DIR_00M))
         bc->setNormalVector(0.0, 0.0, -1.0);
 
     bc->setBcAlgorithmType(algorithmType);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
index b0f6d87bf938480b6568dcb648d5e8541a94ef4e..5c2225e0a1212931805207da7bacf1a1a797e290 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
@@ -66,18 +66,18 @@ public:
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- start
 
-    void init(const D3Q27Interactor *const &interactor, const double &timestep = 0) override {}
-    void update(const D3Q27Interactor *const &interactor, const double &timestep = 0) override {}
+    void init(const D3Q27Interactor *const &interactor, const real &timestep = 0) override {}
+    void update(const D3Q27Interactor *const &interactor, const real &timestep = 0) override {}
 
     void adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                             const double & /*worldX1*/, const double & /*worldX2*/, const double & /*worldX3*/,
-                             const double &q, const int &fdirection, const double & /*time*/ = 0) override
+                             const real & /*worldX1*/, const real & /*worldX2*/, const real & /*worldX3*/,
+                             const real &q, const int &fdirection, const real & /*time*/ = 0) override
     {
         bc->setSlipBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-        bc->setQ((float)q, fdirection);
+        bc->setQ((real)q, fdirection);
     }
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
index 5d9993c459b756dc1d8663907ee90bc0eabef51c..ec90ef01246a7ac2001e21ce3646981e61f68f96 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
@@ -20,15 +20,17 @@ void SlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions)
 //////////////////////////////////////////////////////////////////////////
 void SlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3, drho;
+    real rho, vx1, vx2, vx3, drho;
     calcMacrosFct(f, drho, vx1, vx2, vx3);
     calcFeqFct(feq, drho, vx1, vx2, vx3);
 
     UbTupleFloat3 normale = bcPtr->getNormalVector();
-    LBMReal amp            = vx1 * val<1>(normale) + vx2 * val<2>(normale) + vx3 * val<3>(normale);
+    real amp            = vx1 * val<1>(normale) + vx2 * val<2>(normale) + vx3 * val<3>(normale);
 
     vx1 = vx1 - amp * val<1>(normale); // normale zeigt von struktur weg!
     vx2 = vx2 - amp * val<2>(normale); // normale zeigt von struktur weg!
@@ -42,40 +44,40 @@ void SlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
          //vx3=0;
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1+vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1-vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1-vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1+vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::lbm::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::lbm::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::lbm::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::lbm::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::lbm::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::lbm::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::lbm::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::lbm::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::lbm::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::lbm::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::lbm::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::lbm::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::lbm::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::lbm::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::lbm::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::lbm::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::lbm::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::lbm::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::lbm::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::lbm::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::lbm::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::lbm::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
index 10c10f14f6b2bd6f4f85d7fbe0c7d9d4650cbe73..b3c97393af0e21f5732ee2763c09f9fc60017862 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
@@ -52,20 +52,20 @@ SPtr<BCAlgorithm> ThinWallNoSlipBCAlgorithm::clone()
 //////////////////////////////////////////////////////////////////////////
 void ThinWallNoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-    LBMReal fReturn;
+    real fReturn;
 
     for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
         if (bcPtr->hasNoSlipBoundaryFlag(fdir)) {
             const int invDir = D3Q27System::INVDIR[fdir];
             if (pass == 1) {
-                LBMReal q = bcPtr->getQ(invDir);
+                real q = bcPtr->getQ(invDir);
                 fReturn   = ((1.0 - q) / (1.0 + q)) * 0.5 *
                           (f[invDir] - f[fdir] +
                            (f[invDir] + f[fdir] - collFactor * (feq[fdir] + feq[invDir])) / (1.0 - collFactor));
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
index e21c9b4fbb417242b0cc858afb26ddd16fffce18..f9995d49fae300b44e30df4b8e3f47cd7ac95929 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
@@ -53,6 +53,6 @@ protected:
 
 private:
     int pass;
-    LBMReal fTemp[D3Q27System::ENDF + 1];
+    real fTemp[D3Q27System::ENDF + 1];
 };
 #endif // ThinWallNoSlipBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
index bec8e139e333f5fa18847ddbb5fbb11c5c5c1eac..ebdf07f25ba489a87b637646271171bdc6de6d58 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
@@ -72,21 +72,22 @@ void ThixotropyDensityBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> d
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyDensityBCAlgorithm::applyBC()
 {
-   using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+    using namespace D3Q27System;
 
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
-	LBMReal heq[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
+	real heq[D3Q27System::ENDF + 1];
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
 	
-	LBMReal rho, vx1, vx2, vx3;
+	real rho, vx1, vx2, vx3;
 	
 	calcMacrosFct(f, rho, vx1, vx2, vx3);
 	calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 	D3Q27System::calcCompFeq(heq, lambda, vx1, vx2, vx3);
 
 
@@ -95,25 +96,25 @@ void ThixotropyDensityBCAlgorithm::applyBC()
 	int nx3 = x3;
 
 	//flag points in direction of fluid
-	if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+	if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) { nx3 += 1; }
 	else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-	LBMReal rhoBC = bcPtr->getBoundaryDensity();
+	real rhoBC = bcPtr->getBoundaryDensity();
 
 	for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++)
 	{
 		if (bcPtr->hasDensityBoundaryFlag(fdir))
 		{
-			LBMReal ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
+			real ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
 			ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3) + f[fdir] - ftemp;
 			distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
 
-			LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+			real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
 			htemp = D3Q27System::getCompFeqForDirection(fdir,lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
 			distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
 		}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
index 0ed191335ac05eb0e246271f577b024cc11b8de9..2b83eed0ef9720b247751011e4d49d70df4b5e71 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
@@ -47,12 +47,12 @@ public:
 	//void addDistributionsF(SPtr<DistributionArray3D> distributions);
 	void addDistributionsH(SPtr<DistributionArray3D> distributions);
 	void applyBC();
-	void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-	LBMReal getLambdaBC() { return this->lambdaBC; }
+	void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+	real getLambdaBC() { return this->lambdaBC; }
 protected:
 	SPtr<DistributionArray3D> distributionsH;
 private:
-	LBMReal lambdaBC;
+	real lambdaBC;
 };
 #endif // ThixotropyDensityBCAlgorithm_h__
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
index 48b15fde31369a1857055263f9fc070ce9415a1b..e973a0091ea12db88e21052c3addc8fa4db8e995 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
@@ -70,18 +70,18 @@ void ThixotropyNoSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> di
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyNoSlipBCAlgorithm::applyBC()
 {
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
-	LBMReal heq[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
+	real heq[D3Q27System::ENDF + 1];
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
-	LBMReal rho, vx1, vx2, vx3;//, concentration, fl1, fl2, fl3, m100;
+	real rho, vx1, vx2, vx3;//, concentration, fl1, fl2, fl3, m100;
 	calcMacrosFct(f, rho, vx1, vx2, vx3);
 	calcFeqFct(feq, rho, vx1, vx2, vx3);
 
 	//calcDiffusionMacrosFctPost(h, concentration, fl1, fl2, fl3, m100, collFactor);
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 	D3Q27System::calcCompFeq(heq, lambda, 0., 0., 0.);
 
 	for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++)
@@ -90,9 +90,9 @@ void ThixotropyNoSlipBCAlgorithm::applyBC()
 		{
 			//quadratic bounce back
 			const int invDir = D3Q27System::INVDIR[fdir];
-			LBMReal q = bcPtr->getQ(invDir);
-			LBMReal fReturnf = ((1.0 - q) / (1.0 + q))*((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q / (1.0 + q))*(f[invDir] + f[fdir]));
-			LBMReal fReturnh = ((1.0 - q) / (1.0 + q))*((h[invDir] - heq[invDir]) / (1.0 - collFactor) + heq[invDir]) + ((q / (1.0 + q))*(h[invDir] + h[fdir]));
+			real q = bcPtr->getQ(invDir);
+			real fReturnf = ((1.0 - q) / (1.0 + q))*((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q / (1.0 + q))*(f[invDir] + f[fdir]));
+			real fReturnh = ((1.0 - q) / (1.0 + q))*((h[invDir] - heq[invDir]) / (1.0 - collFactor) + heq[invDir]) + ((q / (1.0 + q))*(h[invDir] + h[fdir]));
 
 			distributions->setDistributionForDirection(fReturnf, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 			distributionsH->setDistributionForDirection(fReturnh, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
index ed90cc7596e186ab9984f25e2ba0ecdb625c9135..8124520338db470a080df868df39df0b8b9d66ed 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
@@ -69,9 +69,11 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::addDistributionsH(SPtr<Distribut
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
 {
+   using namespace vf::lbm::dir;
    using namespace D3Q27System;
-   LBMReal f[ENDF + 1];
-   LBMReal ftemp[ENDF + 1];
+
+   real f[ENDF + 1];
+   real ftemp[ENDF + 1];
 
    int nx1 = x1;
    int nx2 = x2;
@@ -90,21 +92,21 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    distributions->getDistribution(f, x1, x2, x3);
    distributions->getDistribution(ftemp, nx1, nx2, nx3);
 
-   LBMReal rho, vx1, vx2, vx3;
+   real rho, vx1, vx2, vx3;
    calcMacrosFct(f, rho, vx1, vx2, vx3);
 
    switch (direction)
    {
    case DIR_P00:
-      f[DIR_P00] = ftemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P00];
-      f[DIR_PP0] = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PP0];
-      f[DIR_PM0] = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PM0];
-      f[DIR_P0P] = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0P];
-      f[DIR_P0M] = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0M];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPP];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMP];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPM];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMM];
+      f[DIR_P00] = ftemp[DIR_P00] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P00];
+      f[DIR_PP0] = ftemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PP0];
+      f[DIR_PM0] = ftemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PM0];
+      f[DIR_P0P] = ftemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P0P];
+      f[DIR_P0M] = ftemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_P0M];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PPP];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PMP];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PPM];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * f[DIR_PMM];
 
       distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
       distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -117,15 +119,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
       break;
    case DIR_M00:
-      f[DIR_M00] = ftemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M00];
-      f[DIR_MP0] = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MP0];
-      f[DIR_MM0] = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MM0];
-      f[DIR_M0P] = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0P];
-      f[DIR_M0M] = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0M];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMP];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMM];
+      f[DIR_M00] = ftemp[DIR_M00] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M00];
+      f[DIR_MP0] = ftemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MP0];
+      f[DIR_MM0] = ftemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MM0];
+      f[DIR_M0P] = ftemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M0P];
+      f[DIR_M0M] = ftemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_M0M];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MPP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MMP];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MPM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
       distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -138,15 +140,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_0P0:
-      f[DIR_0P0] = ftemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0P0];
-      f[DIR_PP0] = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PP0];
-      f[DIR_MP0] = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MP0];
-      f[DIR_0PP] = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PP];
-      f[DIR_0PM] = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PM];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPP];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPP];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPM];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPM];
+      f[DIR_0P0] = ftemp[DIR_0P0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0P0];
+      f[DIR_PP0] = ftemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PP0];
+      f[DIR_MP0] = ftemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MP0];
+      f[DIR_0PP] = ftemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0PP];
+      f[DIR_0PM] = ftemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_0PM];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PPP];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MPP];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_PPM];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * f[DIR_MPM];
 
       distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
       distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -159,15 +161,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
       break;
    case DIR_0M0:
-      f[DIR_0M0] = ftemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0M0];
-      f[DIR_PM0] = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PM0];
-      f[DIR_MM0] = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MM0];
-      f[DIR_0MP] = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MP];
-      f[DIR_0MM] = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MM];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMP];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMM];
+      f[DIR_0M0] = ftemp[DIR_0M0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0M0];
+      f[DIR_PM0] = ftemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PM0];
+      f[DIR_MM0] = ftemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MM0];
+      f[DIR_0MP] = ftemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0MP];
+      f[DIR_0MM] = ftemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_0MM];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PMP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MMP];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_PMM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
       distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -180,15 +182,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_00P:
-      f[DIR_00P] = ftemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_00P];
-      f[DIR_P0P] = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_P0P];
-      f[DIR_M0P] = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_M0P];
-      f[DIR_0PP] = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0PP];
-      f[DIR_0MP] = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0MP];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PPP];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MPP];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PMP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MMP];
+      f[DIR_00P] = ftemp[DIR_00P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_00P];
+      f[DIR_P0P] = ftemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_P0P];
+      f[DIR_M0P] = ftemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_M0P];
+      f[DIR_0PP] = ftemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_0PP];
+      f[DIR_0MP] = ftemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_0MP];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_PPP];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_MPP];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_PMP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * f[DIR_MMP];
 
       distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
       distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -201,15 +203,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
       break;
    case DIR_00M:
-      f[DIR_00M] = ftemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_00M];
-      f[DIR_P0M] = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_P0M];
-      f[DIR_M0M] = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_M0M];
-      f[DIR_0PM] = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0PM];
-      f[DIR_0MM] = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0MM];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PPM];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MPM];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PMM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MMM];
+      f[DIR_00M] = ftemp[DIR_00M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_00M];
+      f[DIR_P0M] = ftemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_P0M];
+      f[DIR_M0M] = ftemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_M0M];
+      f[DIR_0PM] = ftemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_0PM];
+      f[DIR_0MM] = ftemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_0MM];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_PPM];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_MPM];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_PMM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
       distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
@@ -224,8 +226,8 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    default:
       UB_THROW(UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
    }
-   LBMReal h[D3Q27System::ENDF + 1];
-   LBMReal htemp[ENDF + 1];
+   real h[D3Q27System::ENDF + 1];
+   real htemp[ENDF + 1];
 
    distributionsH->getDistribution(h, x1, x2, x3);
    distributionsH->getDistribution(htemp, nx1, nx2, nx3);
@@ -240,15 +242,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    switch (direction)
    {
    case DIR_P00:
-      h[DIR_P00]  = htemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P00];
-      h[DIR_PP0] = htemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PP0];
-      h[DIR_PM0] = htemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PM0];
-      h[DIR_P0P] = htemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P0P];
-      h[DIR_P0M] = htemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P0M];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PPP];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PMP];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PPM];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PMM];
+      h[DIR_P00]  = htemp[DIR_P00] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_P00];
+      h[DIR_PP0] = htemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PP0];
+      h[DIR_PM0] = htemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PM0];
+      h[DIR_P0P] = htemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_P0P];
+      h[DIR_P0M] = htemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_P0M];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PPP];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PMP];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PPM];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 + vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx1) * h[DIR_PMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
       distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -261,15 +263,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
       break;
    case DIR_M00:
-      h[DIR_M00] = htemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M00];
-      h[DIR_MP0] = htemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MP0];
-      h[DIR_MM0] = htemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MM0];
-      h[DIR_M0P] = htemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M0P];
-      h[DIR_M0M] = htemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M0M];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MPP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MMP];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MPM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MMM];
+      h[DIR_M00] = htemp[DIR_M00] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_M00];
+      h[DIR_MP0] = htemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MP0];
+      h[DIR_MM0] = htemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MM0];
+      h[DIR_M0P] = htemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_M0P];
+      h[DIR_M0M] = htemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_M0M];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MPP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MMP];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MPM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx1) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx1) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
       distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -282,15 +284,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_0P0:
-      h[DIR_0P0] = htemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0P0];
-      h[DIR_PP0] = htemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PP0];
-      h[DIR_MP0] = htemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MP0];
-      h[DIR_0PP] = htemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0PP];
-      h[DIR_0PM] = htemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0PM];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PPP];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MPP];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PPM];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MPM];
+      h[DIR_0P0] = htemp[DIR_0P0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_0P0];
+      h[DIR_PP0] = htemp[DIR_PP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_PP0];
+      h[DIR_MP0] = htemp[DIR_MP0] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_MP0];
+      h[DIR_0PP] = htemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_0PP];
+      h[DIR_0PM] = htemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_0PM];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_PPP];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_MPP];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_PPM];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 + vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx2) * h[DIR_MPM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
       distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -303,15 +305,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
       break;
    case DIR_0M0:
-      h[DIR_0M0] = htemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0M0];
-      h[DIR_PM0] = htemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PM0];
-      h[DIR_MM0] = htemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MM0];
-      h[DIR_0MP] = htemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0MP];
-      h[DIR_0MM] = htemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0MM];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PMP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MMP];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PMM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MMM];
+      h[DIR_0M0] = htemp[DIR_0M0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_0M0];
+      h[DIR_PM0] = htemp[DIR_PM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_PM0];
+      h[DIR_MM0] = htemp[DIR_MM0] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_MM0];
+      h[DIR_0MP] = htemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_0MP];
+      h[DIR_0MM] = htemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_0MM];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_PMP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_MMP];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_PMM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx2) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx2) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
       distributionsH->setDistributionInvForDirection(h[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -324,15 +326,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_00P:
-      h[DIR_00P] = htemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_00P];
-      h[DIR_P0P] = htemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_P0P];
-      h[DIR_M0P] = htemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_M0P];
-      h[DIR_0PP] = htemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_0PP];
-      h[DIR_0MP] = htemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_0MP];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_PPP];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_MPP];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_PMP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_MMP];
+      h[DIR_00P] = htemp[DIR_00P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_00P];
+      h[DIR_P0P] = htemp[DIR_P0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_P0P];
+      h[DIR_M0P] = htemp[DIR_M0P] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_M0P];
+      h[DIR_0PP] = htemp[DIR_0PP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_0PP];
+      h[DIR_0MP] = htemp[DIR_0MP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_0MP];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_PPP];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_MPP];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_PMP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::lbm::constant::one_over_sqrt3 + vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 - vx3) * h[DIR_MMP];
 
       distributionsH->setDistributionInvForDirection(h[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
       distributionsH->setDistributionInvForDirection(h[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -345,15 +347,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
       break;
    case DIR_00M:
-      h[DIR_00M] = htemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_00M];
-      h[DIR_P0M] = htemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_P0M];
-      h[DIR_M0M] = htemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_M0M];
-      h[DIR_0PM] = htemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_0PM];
-      h[DIR_0MM] = htemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_0MM];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_PPM];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_MPM];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_PMM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_MMM];
+      h[DIR_00M] = htemp[DIR_00M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_00M];
+      h[DIR_P0M] = htemp[DIR_P0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_P0M];
+      h[DIR_M0M] = htemp[DIR_M0M] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_M0M];
+      h[DIR_0PM] = htemp[DIR_0PM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_0PM];
+      h[DIR_0MM] = htemp[DIR_0MM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_0MM];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_PPM];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_MPM];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_PMM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::lbm::constant::one_over_sqrt3 - vx3) + (1.0 - vf::lbm::constant::one_over_sqrt3 + vx3) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
       distributionsH->setDistributionInvForDirection(h[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
index 4748212417600b18615a938a6c7a2696ee00eb00..189f52fd3088392be0db5b144158ed97623beafe 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
@@ -71,33 +71,35 @@ void ThixotropyVelocityBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyVelocityBCAlgorithm::applyBC()
 {
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
+	using namespace vf::lbm::dir;
+
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
 
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
 	
-	LBMReal rho, vx1, vx2, vx3, drho;
+	real rho, vx1, vx2, vx3, drho;
 	calcMacrosFct(f, drho, vx1, vx2, vx3);
 	calcFeqFct(feq, drho, vx1, vx2, vx3);
 
 	rho = 1.0 + drho * compressibleFactor;
 
 	//calcDiffusionMacrosFctPost(h, concentration, fl1, fl2, fl3, m100, collFactor);
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 
 	int nx1 = x1;
 	int nx2 = x2;
 	int nx3 = x3;
 
 	//flag points in direction of fluid
-	if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+	if (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
 	else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
 
 	//lambdaBC = bcPtr->getBoundaryThixotropy();
@@ -123,12 +125,12 @@ void ThixotropyVelocityBCAlgorithm::applyBC()
 		if (bcPtr->hasVelocityBoundaryFlag(fdir))
 		{
 			const int invDir = D3Q27System::INVDIR[fdir];
-			LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-			LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-			LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
+			real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+			real velocity = bcPtr->getBoundaryVelocity(invDir);
+			real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
 			distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
-			LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+			real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
 			htemp = D3Q27System::getCompFeqForDirection(fdir, lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
 			distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
 		}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
index 86147cb21a232d8558737f28cb023ff2394a6f0d..bea3b0a374a128f1de88b1ef3d6e75c0ad34190a 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
@@ -45,12 +45,12 @@ public:
 	void addDistributions(SPtr<DistributionArray3D> distributions);
 	void addDistributionsH(SPtr<DistributionArray3D> distributions);
 	void applyBC();
-	void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-	LBMReal getLambdaBC() { return this->lambdaBC; }
+	void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+	real getLambdaBC() { return this->lambdaBC; }
 protected:
 	SPtr<DistributionArray3D> distributionsH;
 private:
-	LBMReal lambdaBC;
+	real lambdaBC;
 };
 #endif // ThixotropyVelocityBCAlgorithm_h__
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
index 454b29bc459045b1f61746eeb7f5f5987f1762a7..6c2622fc7838381de8fa94b12a97d35146b78b43 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
@@ -64,33 +64,35 @@ void ThixotropyVelocityWithDensityBCAlgorithm::addDistributionsH(SPtr<Distributi
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyVelocityWithDensityBCAlgorithm::applyBC()
 {
+    using namespace vf::lbm::dir;
+
    //velocity bc for non reflecting pressure bc
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
    
-   LBMReal h[D3Q27System::ENDF + 1];
+   real h[D3Q27System::ENDF + 1];
    distributionsH->getDistributionInv(h, x1, x2, x3);
 
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    
    rho = 1.0+drho*compressibleFactor;
   
    ///////////////////////////////////////////////////////////////////
    // Rheology
-   LBMReal lambda = D3Q27System::getDensity(h);
+   real lambda = D3Q27System::getDensity(h);
 
    int nx1 = x1;
    int nx2 = x2;
    int nx3 = x3;
 
    //flag points in direction of fluid
-   if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+   if (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
    else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
 
    for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++)
@@ -112,16 +114,16 @@ void ThixotropyVelocityWithDensityBCAlgorithm::applyBC()
          if (bcArray->isSolid(nX1,nX2,nX3))
          {
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal velocity = bcPtr->getBoundaryVelocity(fdir);
+            real velocity = bcPtr->getBoundaryVelocity(fdir);
 
-            LBMReal fReturn = (f[fdir] + f[invDir] - velocity*rho) / 2.0 - drho*D3Q27System::WEIGTH[invDir];
+            real fReturn = (f[fdir] + f[invDir] - velocity*rho) / 2.0 - drho*D3Q27System::WEIGTH[invDir];
             distributions->setDistributionForDirection(fReturn, nX1, nX2, nX3, invDir);
          }
       }
       
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
-         LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+         real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
          htemp = D3Q27System::getCompFeqForDirection(fdir, lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
          distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
       }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
index c24b6c28e2f494ced4a85fe4d8b9d2f33125424a..d69e79a223e3db56cb6a37014bbf030183adf606 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
@@ -50,11 +50,11 @@ public:
    void addDistributions(SPtr<DistributionArray3D> distributions);
    void addDistributionsH(SPtr<DistributionArray3D> distributions);
    void applyBC();
-   void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-   LBMReal getLambdaBC() { return this->lambdaBC; }
+   void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+   real getLambdaBC() { return this->lambdaBC; }
 protected:
    SPtr<DistributionArray3D> distributionsH;
 private:
-   LBMReal lambdaBC;
+   real lambdaBC;
 };
 #endif // ThixotropyVelocityWithDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
index 55d65f629b0311c8599b81b39a62e8be06f35090..6ed8affe2b907764b56ad9f186d7f8adba2b6867 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
@@ -50,7 +50,7 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function,
-                                     const double &startTime, const double &endTime)
+                                     const real &startTime, const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(function, startTime, endTime);
@@ -62,8 +62,8 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function1,
-                                     const mu::Parser &function2, const mu::Parser &function3, const double &startTime,
-                                     const double &endTime)
+                                     const mu::Parser &function2, const mu::Parser &function3, const real &startTime,
+                                     const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(function1, startTime, endTime);
@@ -75,7 +75,7 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const string &functionstring,
-                                     const double &startTime, const double &endTime)
+                                     const real &startTime, const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(functionstring, startTime, endTime);
@@ -117,9 +117,9 @@ VelocityBCAdapter::VelocityBCAdapter(const vector<BCFunction> &velVx1BCs, const
     this->init();
 }
 /*==========================================================*/
-VelocityBCAdapter::VelocityBCAdapter(const double &vx1, const double &vx1StartTime, const double &vx1EndTime,
-                                     const double &vx2, const double &vx2StartTime, const double &vx2EndTime,
-                                     const double &vx3, const double &vx3StartTime, const double &vx3EndTime)
+VelocityBCAdapter::VelocityBCAdapter(const real &vx1, const real &vx1StartTime, const real &vx1EndTime,
+                                     const real &vx2, const real &vx2StartTime, const real &vx2EndTime,
+                                     const real &vx3, const real &vx3StartTime, const real &vx3EndTime)
 {
     this->vx1BCs.emplace_back(vx1, vx1StartTime, vx1EndTime);
     this->vx2BCs.emplace_back(vx2, vx2StartTime, vx2EndTime);
@@ -127,9 +127,9 @@ VelocityBCAdapter::VelocityBCAdapter(const double &vx1, const double &vx1StartTi
     this->init();
 }
 /*==========================================================*/
-VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const double &vx1StartTime, const double &vx1EndTime,
-                                     const string &vx2Function, const double &vx2StartTime, const double &vx2EndTime,
-                                     const string &vx3Function, const double &vx3StartTime, const double &vx3EndTime)
+VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const real &vx1StartTime, const real &vx1EndTime,
+                                     const string &vx2Function, const real &vx2StartTime, const real &vx2EndTime,
+                                     const string &vx3Function, const real &vx3StartTime, const real &vx3EndTime)
 {
     if (vx1Function.size())
         this->vx1BCs.emplace_back(vx1Function, vx1StartTime, vx1EndTime);
@@ -140,9 +140,9 @@ VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const double &vx
     this->init();
 }
 /*==========================================================*/
-void VelocityBCAdapter::setNewVelocities(const double &vx1, const double &vx1StartTime, const double &vx1EndTime,
-                                         const double &vx2, const double &vx2StartTime, const double &vx2EndTime,
-                                         const double &vx3, const double &vx3StartTime, const double &vx3EndTime)
+void VelocityBCAdapter::setNewVelocities(const real &vx1, const real &vx1StartTime, const real &vx1EndTime,
+                                         const real &vx2, const real &vx2StartTime, const real &vx2EndTime,
+                                         const real &vx3, const real &vx3StartTime, const real &vx3EndTime)
 {
     this->clear();
     this->vx1BCs.emplace_back(vx1, vx1StartTime, vx1EndTime);
@@ -198,13 +198,13 @@ void VelocityBCAdapter::init(std::vector<BCFunction> &vxBCs)
     }
 }
 /*==========================================================*/
-void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const double &time)
+void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->timeStep       = time;
     this->tmpVx1Function = this->tmpVx2Function = this->tmpVx3Function = NULL;
 
     // aktuelle velocityfunction bestimmen
-    double maxEndtime = -Ub::inf;
+    real maxEndtime = -Ub::inf;
 
     for (size_t pos = 0; pos < vx1BCs.size(); ++pos) {
         if (UbMath::equal(vx1BCs[pos].getEndTime(), BCFunction::INFTIMEDEPENDENT))
@@ -214,8 +214,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx1BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx1BCs[pos].getEndTime()) ||
-                UbMath::equal(vx1BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx1BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx1BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx1BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx1Function = &vx1BCs[pos].getFunction();
                 break;
             }
@@ -229,8 +229,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx2BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx2BCs[pos].getEndTime()) ||
-                UbMath::equal(vx2BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx2BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx2BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx2BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx2Function = &vx2BCs[pos].getFunction();
                 break;
             }
@@ -244,8 +244,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx3BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx3BCs[pos].getEndTime()) ||
-                UbMath::equal(vx3BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx3BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx3BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx3BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx3Function = &vx3BCs[pos].getFunction();
                 break;
             }
@@ -284,30 +284,30 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
                          << ", timedependent=" << boolalpha << this->isTimeDependent());
 }
 /*==========================================================*/
-void VelocityBCAdapter::update(const D3Q27Interactor *const &interactor, const double &time)
+void VelocityBCAdapter::update(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->init(interactor, time);
 }
 /*==========================================================*/
 void VelocityBCAdapter::adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                            const double & /*worldX1*/, const double & /*worldX2*/,
-                                            const double & /*worldX3*/, const double &q, const int &fdirection,
-                                            const double & /*time*/)
+                                            const real & /*worldX1*/, const real & /*worldX2*/,
+                                            const real & /*worldX3*/, const real &q, const int &fdirection,
+                                            const real & /*time*/)
 {
     bc->setVelocityBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-    bc->setQ((float)q, fdirection);
+    bc->setQ((real)q, fdirection);
 }
 /*==========================================================*/
-void VelocityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                                const double &worldX2, const double &worldX3, const double &time)
+void VelocityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                                const real &worldX2, const real &worldX3, const real &time)
 {
     this->setNodeVelocity(interactor, bc, worldX1, worldX2, worldX3, time);
     bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
 void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                        const double &worldX1, const double &worldX2, const double &worldX3,
-                                        const double &timestep)
+                                        const real &worldX1, const real &worldX2, const real &worldX3,
+                                        const real &timestep)
 {
     // Geschwindigkeiten setzen
     try {
@@ -318,11 +318,11 @@ void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/,
         this->timeStep = timestep;
 
         if (tmpVx1Function)
-            bc->setBoundaryVelocityX1((LBMReal)tmpVx1Function->Eval());
+            bc->setBoundaryVelocityX1((real)tmpVx1Function->Eval());
         if (tmpVx2Function)
-            bc->setBoundaryVelocityX2((LBMReal)tmpVx2Function->Eval());
+            bc->setBoundaryVelocityX2((real)tmpVx2Function->Eval());
         if (tmpVx3Function)
-            bc->setBoundaryVelocityX3((LBMReal)tmpVx3Function->Eval());
+            bc->setBoundaryVelocityX3((real)tmpVx3Function->Eval());
     } catch (mu::Parser::exception_type &e) {
         stringstream error;
         error << "mu::parser exception occurs, message(" << e.GetMsg() << "), formula("
@@ -334,12 +334,12 @@ void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/,
     }
 }
 /*==========================================================*/
-UbTupleDouble3 VelocityBCAdapter::getVelocity(const double &x1, const double &x2, const double &x3,
-                                              const double &timeStep) const
+UbTupleDouble3 VelocityBCAdapter::getVelocity(const real &x1, const real &x2, const real &x3,
+                                              const real &timeStep) const
 {
-    double vx1     = 0.0;
-    double vx2     = 0.0;
-    double vx3     = 0.0;
+    real vx1     = 0.0;
+    real vx2     = 0.0;
+    real vx3     = 0.0;
     this->x1       = x1;
     this->x2       = x2;
     this->x3       = x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
index c6f5039a3ea3b2612e765235c88d357a25f9a89c..c0596b8e234f581726243a8a95beb6eb71121be4 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
@@ -88,14 +88,14 @@ public:
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const BCFunction &velVxBC);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function,
-                      const double &startTime, const double &endTime);
+                      const real &startTime, const real &endTime);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function1,
-                      const mu::Parser &function2, const mu::Parser &function3, const double &startTime,
-                      const double &endTime);
+                      const mu::Parser &function2, const mu::Parser &function3, const real &startTime,
+                      const real &endTime);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const std::string &functionstring,
-                      const double &startTime, const double &endTime);
+                      const real &startTime, const real &endTime);
 
     VelocityBCAdapter(const BCFunction &velBC, bool x1Dir, bool x2Dir, bool x3Dir);
 
@@ -104,13 +104,13 @@ public:
     VelocityBCAdapter(const std::vector<BCFunction> &velVx1BCs, const std::vector<BCFunction> &velVx2BCs,
                       const std::vector<BCFunction> &velVx3BCs);
 
-    VelocityBCAdapter(const double &vx1, const double &vx1StartTime, const double &vx1EndTime, const double &vx2,
-                      const double &vx2StartTime, const double &vx2EndTime, const double &vx3,
-                      const double &vx3StartTime, const double &vx3EndTime);
+    VelocityBCAdapter(const real &vx1, const real &vx1StartTime, const real &vx1EndTime, const real &vx2,
+                      const real &vx2StartTime, const real &vx2EndTime, const real &vx3,
+                      const real &vx3StartTime, const real &vx3EndTime);
 
-    VelocityBCAdapter(const std::string &vx1Function, const double &vx1StartTime, const double &vx1EndTime,
-                      const std::string &vx2Function, const double &vx2StartTime, const double &vx2EndTime,
-                      const std::string &vx3Function, const double &vx3StartTime, const double &vx3EndTime);
+    VelocityBCAdapter(const std::string &vx1Function, const real &vx1StartTime, const real &vx1EndTime,
+                      const std::string &vx2Function, const real &vx2StartTime, const real &vx2EndTime,
+                      const std::string &vx3Function, const real &vx3StartTime, const real &vx3EndTime);
 
     // methods
     void setTimePeriodic() { (this->type |= TIMEPERIODIC); }
@@ -118,25 +118,25 @@ public:
     bool isTimePeriodic() { return ((this->type & TIMEPERIODIC) == TIMEPERIODIC); }
 
     // The following is meant for moving objects...
-    void setNewVelocities(const double &vx1, const double &vx1StartTime, const double &vx1EndTime, const double &vx2,
-                          const double &vx2StartTime, const double &vx2EndTime, const double &vx3,
-                          const double &vx3StartTime, const double &vx3EndTime);
+    void setNewVelocities(const real &vx1, const real &vx1StartTime, const real &vx1EndTime, const real &vx2,
+                          const real &vx2StartTime, const real &vx2EndTime, const real &vx3,
+                          const real &vx3StartTime, const real &vx3EndTime);
 
     //------------- implements BCAdapter ----- start
     std::string toString();
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override;
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override;
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override;
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override;
 
-    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                             const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                             const double &time = 0) override;
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                             const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                             const real &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
     //------------- implements BCAdapter ----- end
 
-    UbTupleDouble3 getVelocity(const double &x1, const double &x2, const double &x3, const double &timeStep) const;
+    UbTupleDouble3 getVelocity(const real &x1, const real &x2, const real &x3, const real &timeStep) const;
 
 protected:
     void init();
@@ -153,8 +153,8 @@ protected:
         vx3BCs.clear();
         this->init();
     }
-    void setNodeVelocity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                         const double &worldX2, const double &worldX3, const double &timestep);
+    void setNodeVelocity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                         const real &worldX2, const real &worldX3, const real &timestep);
 
 private:
     mutable mu::value_type x1, x2, x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
index 15768aeeb043620aece86194319eafe00ea1df60..9f1bf39d8d6d11747a71b04d9cfc7e3b70870802 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
@@ -55,10 +55,10 @@ void VelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributio
 //////////////////////////////////////////////////////////////////////////
 void VelocityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3, drho;
+    real rho, vx1, vx2, vx3, drho;
     calcMacrosFct(f, drho, vx1, vx2, vx3);
     calcFeqFct(feq, drho, vx1, vx2, vx3);
 
@@ -70,9 +70,9 @@ void VelocityBCAlgorithm::applyBC()
     for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
         if (bcPtr->hasVelocityBoundaryFlag(fdir)) {
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-            LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
+            real q        = bcPtr->getQ(invDir);
+            real velocity = bcPtr->getBoundaryVelocity(invDir);
+            real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
                               ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
             distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
                                                        x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
index 1fe6632b9de8cb64d98c072bfccaa72ce4bb9ee8..09a52798e62013fdc5878678a4c47aec003b68a2 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
@@ -56,10 +56,10 @@ void VelocityWithDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 void VelocityWithDensityBCAlgorithm::applyBC()
 {
    //velocity bc for non reflecting pressure bc
-   LBMReal f[D3Q27System::ENDF+1];
-   //LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   //real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    //calcFeqFct(feq, drho, vx1, vx2, vx3);
    
@@ -83,7 +83,7 @@ void VelocityWithDensityBCAlgorithm::applyBC()
             if (bcArray->isSolid(nX1, nX2, nX3)) {
                 const int invDir = D3Q27System::INVDIR[fdir];
                 //LBMReal q =1.0;// bcPtr->getQ(invDir);// m+m q=0 stabiler
-                LBMReal velocity = bcPtr->getBoundaryVelocity(fdir);
+                real velocity = bcPtr->getBoundaryVelocity(fdir);
                 
                 //LBMReal fReturn = ((1.0 - q) / (1.0 + q))*((f[fdir] - feq[fdir]*collFactor) / (1.0 -
                 //collFactor)) + ((q*(f[fdir] + f[invDir]) - velocity*rho) / (1.0 +
@@ -92,7 +92,7 @@ void VelocityWithDensityBCAlgorithm::applyBC()
                 // if q=1
                 // LBMReal fReturn = ((q*(f[fdir] + f[invDir]) - velocity*rho) / (1.0 +
                 // q))-drho*D3Q27System::WEIGTH[invDir];
-                LBMReal fReturn = (f[fdir] + f[invDir] - velocity * rho) / 2.0 - drho * D3Q27System::WEIGTH[invDir];
+                real fReturn = (f[fdir] + f[invDir] - velocity * rho) / 2.0 - drho * D3Q27System::WEIGTH[invDir];
 
                 distributions->setDistributionForDirection(fReturn, nX1, nX2, nX3, invDir);
             }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
index d02c249a62f60cdb91fbd4af9e975d39c6c4e29d..f3137e5f63dff257d1311eee8d75550f186b480e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
@@ -13,7 +13,7 @@
 #include <SetForcingBlockVisitor.h>
 
 AdjustForcingCoProcessor::AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   SPtr<IntegrateValuesHelper> integrateValues, double vTarged,
+                                                   SPtr<IntegrateValuesHelper> integrateValues, real vTarged,
                                                    std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), integrateValues(integrateValues), comm(comm), vx1Targed(vTarged)
@@ -71,13 +71,13 @@ AdjustForcingCoProcessor::AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbSch
 }
 
 //////////////////////////////////////////////////////////////////////////
-void AdjustForcingCoProcessor::process(double step)
+void AdjustForcingCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void AdjustForcingCoProcessor::collectData(double step)
+void AdjustForcingCoProcessor::collectData(real step)
 {
     //////////////////////////////////////////////////////////////////////////////////////////////////
     // temporary solution
@@ -104,7 +104,7 @@ void AdjustForcingCoProcessor::collectData(double step)
 
     if (root) {
         cellsVolume = integrateValues->getCellsVolume();
-        double vx1  = integrateValues->getVx1();
+        real vx1  = integrateValues->getVx1();
         vx1Average  = (vx1 / cellsVolume);
 
         //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
index fbf75d066e626a3cf3d44c481138a9b1007b3107..be8dbc69f957521cff88cfbcc2b7260db05d6cdc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
@@ -5,6 +5,7 @@
 #include <string>
 
 #include "CoProcessor.h"
+#include "lbm/constants/D3Q27.h"
 
 namespace vf::mpi {class Communicator;}
 class UbScheduler;
@@ -21,35 +22,35 @@ class AdjustForcingCoProcessor : public CoProcessor
 {
 public:
     AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<IntegrateValuesHelper> integrateValues, double vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
+                             SPtr<IntegrateValuesHelper> integrateValues, real vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
     //!< calls collect PostprocessData
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //!< object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> integrateValues;
     //!< compares velocity in integrateValues with target velocity and adjusts forcing accordingly.
-    void collectData(double step);
+    void collectData(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
-    double vx1Targed; //!< target velocity.
-    double forcing;   //!< forcing at previous update step.
-    double cellsVolume;
-    double vx1Average;
+    real vx1Targed; //!< target velocity.
+    real forcing;   //!< forcing at previous update step.
+    real cellsVolume;
+    real vx1Average;
     bool root;
-    double Kpcrit; // Kp critical
-    double Tcrit;  // the oscillation period
-    double Tn;
-    double Tv;
-    double e;
-    double Ta;
-    double Kp;
-    double Ki;
-    double Kd;
-    double y;
-    double esum;
-    double eold;
+    real Kpcrit; // Kp critical
+    real Tcrit;  // the oscillation period
+    real Tn;
+    real Tv;
+    real e;
+    real Ta;
+    real Kp;
+    real Ki;
+    real Kd;
+    real y;
+    real esum;
+    real eold;
     // std::vector<CalcNodes> cnodes;
     std::string path;
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
index adce3f920ed36850ff711c10c7777a5035de027e..1a5276fcf44098254cee825af503a4752df60cd5 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
@@ -25,7 +25,7 @@ AverageValuesCoProcessor::AverageValuesCoProcessor(SPtr<Grid3D> grid, const std:
 {
     resetStepMeans  = (int)rsMeans->getMinBegin();
     resetStepRMS    = (int)rsRMS->getMinBegin();
-    averageInterval = (double)Avs->getMinStep();
+    averageInterval = (real)Avs->getMinStep();
 
     gridRank     = grid->getRank();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -54,7 +54,7 @@ AverageValuesCoProcessor::AverageValuesCoProcessor(SPtr<Grid3D> grid, const std:
     // restartStep = 0.0;
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::process(double step)
+void AverageValuesCoProcessor::process(real step)
 {
     // resetRMS(step);
     if (resetSchedulerRMS->isDue(step))
@@ -76,7 +76,7 @@ void AverageValuesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "AverageValuesCoProcessor::update:" << step);
 }
 
-void AverageValuesCoProcessor::resetDataRMS(double step)
+void AverageValuesCoProcessor::resetDataRMS(real step)
 {
     resetStepRMS = (int)step;
 
@@ -120,7 +120,7 @@ void AverageValuesCoProcessor::resetDataRMS(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::resetDataMeans(double step)
+void AverageValuesCoProcessor::resetDataMeans(real step)
 {
     resetStepMeans = (int)step;
 
@@ -161,7 +161,7 @@ void AverageValuesCoProcessor::resetDataMeans(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::collectData(double step)
+void AverageValuesCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -219,7 +219,7 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //	UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -272,20 +272,20 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
                                                 float(val<2>(org) - val<2>(nodeOffset) + ix2 * dx),
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
-                    LBMReal vx = (*av)(AvVx, ix1, ix2, ix3);
-                    LBMReal vy = (*av)(AvVy, ix1, ix2, ix3);
-                    LBMReal vz = (*av)(AvVz, ix1, ix2, ix3);
+                    real vx = (*av)(AvVx, ix1, ix2, ix3);
+                    real vy = (*av)(AvVy, ix1, ix2, ix3);
+                    real vz = (*av)(AvVz, ix1, ix2, ix3);
 
-                    LBMReal vxx = (*av)(AvVxx, ix1, ix2, ix3);
-                    LBMReal vyy = (*av)(AvVyy, ix1, ix2, ix3);
-                    LBMReal vzz = (*av)(AvVzz, ix1, ix2, ix3);
+                    real vxx = (*av)(AvVxx, ix1, ix2, ix3);
+                    real vyy = (*av)(AvVyy, ix1, ix2, ix3);
+                    real vzz = (*av)(AvVzz, ix1, ix2, ix3);
 
-                    LBMReal vxy = (*av)(AvVxy, ix1, ix2, ix3);
-                    LBMReal vxz = (*av)(AvVxz, ix1, ix2, ix3);
-                    LBMReal vyz = (*av)(AvVyz, ix1, ix2, ix3);
+                    real vxy = (*av)(AvVxy, ix1, ix2, ix3);
+                    real vxz = (*av)(AvVxz, ix1, ix2, ix3);
+                    real vyz = (*av)(AvVyz, ix1, ix2, ix3);
 
-                    LBMReal vp    = (*av)(AvP, ix1, ix2, ix3);
-                    LBMReal vprms = (*av)(AvPrms, ix1, ix2, ix3);
+                    real vp    = (*av)(AvP, ix1, ix2, ix3);
+                    real vprms = (*av)(AvPrms, ix1, ix2, ix3);
 
                     data[index++].push_back(vx);
                     data[index++].push_back(vy);
@@ -330,7 +330,7 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
+void AverageValuesCoProcessor::calculateAverageValues(real timeStep)
 {
     using namespace D3Q27System;
 
@@ -342,7 +342,7 @@ void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
         calcMacros = &calcIncompMacroscopicValues;
     }
 
-    LBMReal f[27];
+    real f[27];
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (SPtr<Block3D> block : blockVector[level]) {
@@ -375,18 +375,18 @@ void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
                                 //////////////////////////////////////////////////////////////////////////
                                 // compute velocity
                                 //////////////////////////////////////////////////////////////////////////
-                                LBMReal vx, vy, vz, rho;
+                                real vx, vy, vz, rho;
                                 calcMacros(f, rho, vx, vy, vz);
-                                double press = D3Q27System::calcPress(f, rho, vx, vy, vz);
+                                real press = D3Q27System::calcPress(f, rho, vx, vy, vz);
 
                                 //////////////////////////////////////////////////////////////////////////
                                 // compute average values
                                 //////////////////////////////////////////////////////////////////////////
 
-                                LBMReal timeStepAfterResetRMS =
-                                    (double)(timeStep - resetStepRMS) / ((double)averageInterval);
-                                LBMReal timeStepAfterResetMeans =
-                                    (double)(timeStep - resetStepMeans) / ((double)averageInterval);
+                                real timeStepAfterResetRMS =
+                                    (real)(timeStep - resetStepRMS) / ((real)averageInterval);
+                                real timeStepAfterResetMeans =
+                                    (real)(timeStep - resetStepMeans) / ((real)averageInterval);
 
                                 // mean velocity
                                 (*av)(AvVx, ix1, ix2, ix3) =
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
index 5ba922824167c4e6a686a4bc46b0ccc2813dbae7..b45118adb5b9a18de0eabcc061fa18e11c042bdf 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
@@ -29,21 +29,21 @@ public:
     AverageValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
                              SPtr<UbScheduler> Avs, SPtr<UbScheduler> rsMeans, SPtr<UbScheduler> rsRMS, bool restart);
     //! Make update
-    void process(double step) override;
+    void process(real step) override;
     //! Resets averaged velocity and RMS-values according to ResetSceduler
-    void reset(double step);
+    void reset(real step);
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Reset data
-    void resetDataRMS(double step);
-    void resetDataMeans(double step);
+    void resetDataRMS(real step);
+    void resetDataMeans(real step);
     //! prepare data
     void addData(const SPtr<Block3D> block);
     void clearData();
     //! Computes average and RMS values of macroscopic quantities
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
     ////! write .txt file spatial intergrated averaged value, fluctuation, porous features
     // void collectPlotDataZ(double step);
     ////! create txt file and write head line
@@ -53,14 +53,14 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel; // min init level
     int maxInitLevel;
     int gridRank;
     int resetStepRMS;
     int resetStepMeans;
-    double averageInterval;
+    real averageInterval;
     std::string path;
     WbWriter *writer;
     bool restart, compressible;
@@ -83,7 +83,7 @@ private:
         AvPrms = 10
     };
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
index 4e75e6337a44d46586a62a74d2e592b7d0839c57..d50c58d5698a7faa0939fedd613075873d9e5363 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
@@ -14,7 +14,7 @@
 #include "UbScheduler.h"
 
 CalculateForcesCoProcessor::CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm, double v, double a)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm, real v, real a)
     : CoProcessor(grid, s), path(path), comm(comm), v(v), a(a), forceX1global(0), forceX2global(0), forceX3global(0)
 {
     if (comm->getProcessID() == comm->getRoot()) {
@@ -57,7 +57,7 @@ CalculateForcesCoProcessor::CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<U
 //////////////////////////////////////////////////////////////////////////
 CalculateForcesCoProcessor::~CalculateForcesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::process(double step)
+void CalculateForcesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -65,7 +65,7 @@ void CalculateForcesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::collectData(double step)
+void CalculateForcesCoProcessor::collectData(real step)
 {
     calculateForces();
 
@@ -109,9 +109,9 @@ void CalculateForcesCoProcessor::calculateForces()
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         for (BcNodeIndicesMap::value_type t : interactor->getBcNodeIndicesMap()) {
-            double forceX1 = 0.0;
-            double forceX2 = 0.0;
-            double forceX3 = 0.0;
+            real forceX1 = 0.0;
+            real forceX2 = 0.0;
+            real forceX3 = 0.0;
 
             SPtr<Block3D> block                             = t.first;
             std::set<std::vector<int>> &transNodeIndicesSet = t.second;
@@ -151,8 +151,8 @@ void CalculateForcesCoProcessor::calculateForces()
             }
             // if we have got discretization with more level
             // deltaX is LBM deltaX and equal LBM deltaT
-            double deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
-            double deltaXquadrat = deltaX * deltaX;
+            real deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
+            real deltaXquadrat = deltaX * deltaX;
             forceX1 *= deltaXquadrat;
             forceX2 *= deltaXquadrat;
             forceX3 *= deltaXquadrat;
@@ -164,8 +164,8 @@ void CalculateForcesCoProcessor::calculateForces()
             forceX3global += forceX3;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
     values.push_back(forceX1global);
     values.push_back(forceX2global);
     values.push_back(forceX3global);
@@ -191,10 +191,10 @@ UbTupleDouble3 CalculateForcesCoProcessor::getForces(int x1, int x2, int x3, SPt
 
     if (bc) {
         // references to tuple "force"
-        double &forceX1 = val<1>(force);
-        double &forceX2 = val<2>(force);
-        double &forceX3 = val<3>(force);
-        double f, fnbr;
+        real &forceX1 = val<1>(force);
+        real &forceX2 = val<2>(force);
+        real &forceX3 = val<3>(force);
+        real f, fnbr;
 
         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
             if (bc->hasNoSlipBoundaryFlag(fdir)) {
@@ -217,9 +217,9 @@ UbTupleDouble3 CalculateForcesCoProcessor::getForces(int x1, int x2, int x3, SPt
 //////////////////////////////////////////////////////////////////////////
 void CalculateForcesCoProcessor::calculateCoefficients()
 {
-    double F1 = forceX1global;
-    double F2 = forceX2global;
-    double F3 = forceX3global;
+    real F1 = forceX1global;
+    real F2 = forceX2global;
+    real F3 = forceX3global;
 
     // return 2*F/(rho*v*v*a);
     C1 = 2.0 * F1 / (v * v * a);
@@ -229,7 +229,7 @@ void CalculateForcesCoProcessor::calculateCoefficients()
 //////////////////////////////////////////////////////////////////////////
 void CalculateForcesCoProcessor::addInteractor(SPtr<D3Q27Interactor> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::write(std::ofstream *fileObject, double value, char *separator)
+void CalculateForcesCoProcessor::write(std::ofstream *fileObject, real value, char *separator)
 {
     (*fileObject).width(12);
     //(*fileObject).precision(2);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
index aa6bfd47799ed5d426550c756eccfff706709e9e..d282ae8ed10ae177ae78e4f559018d7ff0959be7 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
@@ -14,6 +14,7 @@
 
 #include "CoProcessor.h"
 #include "UbTuple.h"
+#include "lbm/constants/D3Q27.h"
 
 class ForceCalculator;
 namespace vf::mpi {class Communicator;}
@@ -30,31 +31,31 @@ public:
     //! \param v - velocity of fluid in LB units
     //! \param a - area of object in LB units
     CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm,
-                               double v, double a);
+                               real v, real a);
     ~CalculateForcesCoProcessor() override;
-    void process(double step) override;
+    void process(real step) override;
     void addInteractor(SPtr<D3Q27Interactor> interactor);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void calculateForces();
     UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions,
                              SPtr<BoundaryConditions> bc);
     void calculateCoefficients();
-    void write(std::ofstream *fileObject, double value, char *separator);
+    void write(std::ofstream *fileObject, real value, char *separator);
 
 private:
     std::string path;
     std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
-    double forceX1global;
-    double forceX2global;
-    double forceX3global;
-    double v; //!< is the speed of the object relative to the fluid
-    double a; //!< is the reference area
-    double C1;
-    double C2;
-    double C3;
+    real forceX1global;
+    real forceX2global;
+    real forceX3global;
+    real v; //!< is the speed of the object relative to the fluid
+    real a; //!< is the reference area
+    real C1;
+    real C2;
+    real C3;
 };
 
 #endif /* D3Q27ForcesCoProcessor_H */
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
index b2c7466f7cd6e7d5dd0aeb0baa152bfb6ced93ae..93b3854070c0b9f1f589e6d32f1872cc8521ca86 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
@@ -41,7 +41,7 @@ CalculateTorqueCoProcessor::~CalculateTorqueCoProcessor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateTorqueCoProcessor::process( double step )
+void CalculateTorqueCoProcessor::process( real step )
 {
    if(scheduler->isDue(step) )
       collectData(step);
@@ -49,7 +49,7 @@ void CalculateTorqueCoProcessor::process( double step )
    UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateTorqueCoProcessor::collectData( double step )
+void CalculateTorqueCoProcessor::collectData( real step )
 {
    calculateForces();
 
@@ -84,20 +84,20 @@ void CalculateTorqueCoProcessor::calculateForces()
 
    for(SPtr<D3Q27Interactor> interactor : interactors)
    {
-      double x1Centre = interactor->getGbObject3D()->getX1Centroid();
-      double x2Centre = interactor->getGbObject3D()->getX2Centroid();
-      double x3Centre = interactor->getGbObject3D()->getX3Centroid();
+      real x1Centre = interactor->getGbObject3D()->getX1Centroid();
+      real x2Centre = interactor->getGbObject3D()->getX2Centroid();
+      real x3Centre = interactor->getGbObject3D()->getX3Centroid();
 
       for(BcNodeIndicesMap::value_type t : interactor->getBcNodeIndicesMap())
       {
-         double torqueX1 = 0.0;
-         double torqueX2 = 0.0;
-         double torqueX3 = 0.0;
+         real torqueX1 = 0.0;
+         real torqueX2 = 0.0;
+         real torqueX3 = 0.0;
 
          SPtr<Block3D> block = t.first;
          std::set< std::vector<int> >& transNodeIndicesSet = t.second;
 
-         double deltaX = grid->getDeltaX(block);
+         real deltaX = grid->getDeltaX(block);
 
          SPtr<ILBMKernel> kernel = block->getKernel();
 
@@ -126,14 +126,14 @@ void CalculateTorqueCoProcessor::calculateForces()
             {
                SPtr<BoundaryConditions> bc = bcArray->getBC(x1,x2,x3);
                UbTupleDouble3 forceVec     = getForces(x1,x2,x3,distributions,bc);
-               double Fx                   = val<1>(forceVec);
-               double Fy                   = val<2>(forceVec);
-               double Fz                   = val<3>(forceVec);
+               real Fx                   = val<1>(forceVec);
+               real Fy                   = val<2>(forceVec);
+               real Fz                   = val<3>(forceVec);
 
                Vector3D worldCoordinates = grid->getNodeCoordinates(block, x1, x2, x3);
-               double rx                 = (worldCoordinates[0] - x1Centre) / deltaX;
-               double ry                 = (worldCoordinates[1] - x2Centre) / deltaX;
-               double rz                 = (worldCoordinates[2] - x3Centre) / deltaX;
+               real rx                 = (worldCoordinates[0] - x1Centre) / deltaX;
+               real ry                 = (worldCoordinates[1] - x2Centre) / deltaX;
+               real rz                 = (worldCoordinates[2] - x3Centre) / deltaX;
 
                torqueX1 += ry * Fz - rz * Fy;
                torqueX2 += rz * Fx - rx * Fz;
@@ -148,8 +148,8 @@ void CalculateTorqueCoProcessor::calculateForces()
          torqueX3global += torqueX3;
       }
    }
-   std::vector<double> values;
-   std::vector<double> rvalues;
+   std::vector<real> values;
+   std::vector<real> rvalues;
    values.push_back(torqueX1global);
    values.push_back(torqueX2global);
    values.push_back(torqueX3global);
@@ -174,16 +174,16 @@ UbTupleDouble3 CalculateTorqueCoProcessor::getForces(int x1, int x2, int x3,  SP
 {
    UbTupleDouble3 force(0.0,0.0,0.0);
 
-   LBMReal fs[D3Q27System::ENDF + 1];
+   real fs[D3Q27System::ENDF + 1];
    distributions->getDistributionInv(fs, x1, x2, x3);
    
    if(bc)
    {
       //references to tuple "force"
-      double& forceX1 = val<1>(force);
-      double& forceX2 = val<2>(force);
-      double& forceX3 = val<3>(force);
-      double f,  fnbr;
+      real& forceX1 = val<1>(force);
+      real& forceX2 = val<2>(force);
+      real& forceX3 = val<3>(force);
+      real f,  fnbr;
 
       for(int fdir=D3Q27System::FSTARTDIR; fdir<=D3Q27System::FENDDIR; fdir++)
       {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
index e488b442b60b2f726747a521e51cad9d4bacdbe9..26686239bdca68a047c4f8c8f4c33f5a09f53bb5 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
@@ -30,19 +30,19 @@ public:
    //! Constructor
    CalculateTorqueCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
 	virtual ~CalculateTorqueCoProcessor();             
-	void process(double step); 
+	void process(real step); 
    void addInteractor(SPtr<D3Q27Interactor> interactor);
 protected:
-	void collectData(double step);
+	void collectData(real step);
    void calculateForces();
    UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
 private:
    std::string path;
    std::shared_ptr<vf::mpi::Communicator> comm;
    std::vector<SPtr<D3Q27Interactor> > interactors;
-   double torqueX1global;
-   double torqueX2global;
-   double torqueX3global;
+   real torqueX1global;
+   real torqueX2global;
+   real torqueX3global;
 };
 
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
index 7d8efad7606b57bb24ac11740843b30d3678fcbb..7aae7505b02ed9248a31b2a009cdc75f09ecd73a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
@@ -35,6 +35,7 @@
 #define CoProcessor_H
 
 #include <PointerDefinitions.h>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 class UbScheduler;
@@ -58,7 +59,7 @@ public:
     virtual ~CoProcessor();
     //! \brief Updates observer
     //! \param step is the actual time step
-    virtual void process(double step) = 0;
+    virtual void process(real step) = 0;
 
 protected:
     SPtr<Grid3D> grid;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
index 6dae1c7049ea3c0d779b31fff2e79104e034790f..0221bf38a599352728dbd42f37b78bc5fa6ff1ee 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
@@ -26,13 +26,13 @@ DecreaseViscosityCoProcessor::DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SP
 //////////////////////////////////////////////////////////////////////////
 DecreaseViscosityCoProcessor::~DecreaseViscosityCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void DecreaseViscosityCoProcessor::process(double step)
+void DecreaseViscosityCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         setViscosity(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void DecreaseViscosityCoProcessor::setViscosity(double step)
+void DecreaseViscosityCoProcessor::setViscosity(real step)
 {
 
     UBLOG(logDEBUG3, "DecreaseViscosityCoProcessor::update:" << step);
@@ -53,7 +53,7 @@ void DecreaseViscosityCoProcessor::setViscosity(double step)
         int istep      = static_cast<int>(step);
         this->timeStep = istep;
         nueFunc->DefineVar("t", &this->timeStep);
-        double nue = nueFunc->Eval();
+        real nue = nueFunc->Eval();
 
         for (int level = minInitLevel; level <= maxInitLevel; level++) {
             std::vector<SPtr<Block3D>> blockVector;
@@ -61,7 +61,7 @@ void DecreaseViscosityCoProcessor::setViscosity(double step)
             for (SPtr<Block3D> block : blockVector) {
                 SPtr<ILBMKernel> kernel = block->getKernel();
                 if (kernel) {
-                    LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+                    real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
                     kernel->setCollisionFactor(collFactor);
                 }
             }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
index ca413ba2d5201d3043594f4a4b4803091bb51cc8..aaa8a3c0b8db4d9adb9b28c19ef993444e7106c0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
@@ -31,11 +31,11 @@ public:
     DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, std::shared_ptr<vf::mpi::Communicator> comm);
     ~DecreaseViscosityCoProcessor() override;
     //! calls collect PostprocessData.
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! resets the collision factor depending on the current timestep.
-    void setViscosity(double step);
+    void setViscosity(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
index 3195ea4dfc6a9be9cf49ef7e04bfe57bce6e70f2..fc181e266e0453a12a149a4c69083497617c61ea 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
@@ -22,7 +22,7 @@ EmergencyExitCoProcessor::EmergencyExitCoProcessor(SPtr<Grid3D> grid, SPtr<UbSch
 //////////////////////////////////////////////////////////////////////////
 EmergencyExitCoProcessor::~EmergencyExitCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void EmergencyExitCoProcessor::process(double step)
+void EmergencyExitCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -30,7 +30,7 @@ void EmergencyExitCoProcessor::process(double step)
     UBLOG(logDEBUG3, "EmergencyExitCoProcessor::update:" << step);
 }
 
-void EmergencyExitCoProcessor::collectData(double step)
+void EmergencyExitCoProcessor::collectData(real step)
 {
     if (readMetafile()) {
         rp->process((int)step);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
index 8894420c979eb6e7879c1788010d7e5d7e807eec..13eaa7832cf5815005d30910332a7d382f047186 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
@@ -25,10 +25,10 @@ public:
                              SPtr<MPIIORestartCoProcessor> rp, std::shared_ptr<vf::mpi::Communicator> comm);
     ~EmergencyExitCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void writeMetafile(int status);
     bool readMetafile();
     void checkMetafile();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
index cb2fd908ff78803e4262ae64e906b8dad3c14dff..918e3afbe8d454ac97707b8c79f927bca324cb52 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
@@ -20,20 +20,20 @@ ForceCalculator::~ForceCalculator() = default;
 Vector3D ForceCalculator::getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions,
                                     SPtr<BoundaryConditions> bc, const Vector3D &boundaryVelocity) const
 {
-    double forceX1 = 0;
-    double forceX2 = 0;
-    double forceX3 = 0;
+    real forceX1 = 0;
+    real forceX2 = 0;
+    real forceX3 = 0;
     if (bc) {
         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
             if (bc->hasNoSlipBoundaryFlag(fdir) || bc->hasVelocityBoundaryFlag(fdir)) {
                 const int invDir  = D3Q27System::INVDIR[fdir];
-                const double f    = distributions->getDistributionInvForDirection(x1, x2, x3, invDir);
-                const double fnbr = distributions->getDistributionInvForDirection(
+                const real f    = distributions->getDistributionInvForDirection(x1, x2, x3, invDir);
+                const real fnbr = distributions->getDistributionInvForDirection(
                     x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
-                double correction[3] = { 0.0, 0.0, 0.0 };
+                real correction[3] = { 0.0, 0.0, 0.0 };
                 if (bc->hasVelocityBoundaryFlag(fdir)) {
-                    const double forceTerm = f - fnbr;
+                    const real forceTerm = f - fnbr;
                     correction[0]          = forceTerm * boundaryVelocity[0];
                     correction[1]          = forceTerm * boundaryVelocity[1];
                     correction[2]          = forceTerm * boundaryVelocity[2];
@@ -60,9 +60,9 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
 
     for (const auto &interactor : interactors) {
         for (const auto &t : interactor->getBcNodeIndicesMap()) {
-            double forceX1 = 0.0;
-            double forceX2 = 0.0;
-            double forceX3 = 0.0;
+            real forceX1 = 0.0;
+            real forceX2 = 0.0;
+            real forceX3 = 0.0;
 
             SPtr<Block3D> block                     = t.first;
             SPtr<ILBMKernel> kernel                 = block->getKernel();
@@ -86,8 +86,8 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
             }
             // if we have got discretization with more level
             // deltaX is LBM deltaX and equal LBM deltaT
-            double deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
-            double deltaXquadrat = deltaX * deltaX;
+            real deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
+            real deltaXquadrat = deltaX * deltaX;
             forceX1 *= deltaXquadrat;
             forceX2 *= deltaXquadrat;
             forceX3 *= deltaXquadrat;
@@ -104,12 +104,12 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
 
 void ForceCalculator::gatherGlobalForces()
 {
-    std::vector<double>
+    std::vector<real>
         values; // intel compiler 17 dasn't support this { forceX1global , forceX2global, forceX3global };
     values.push_back(forceX1global);
     values.push_back(forceX2global);
     values.push_back(forceX3global);
-    std::vector<double> rvalues = comm->gather(values);
+    std::vector<real> rvalues = comm->gather(values);
 
     if (comm->isRoot()) {
         forceX1global = 0.0;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
index 7aeb514abe426020af59a936d5f8b8c184ea496f..03b00f3603c3e8aac25567b7f370e81b61d3ef76 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
@@ -4,7 +4,8 @@
  *  Created on: 25.10.2017
  *  Author: S. Peters
  */
-
+#include "lbm/constants/D3Q27.h"
+ 
 #ifndef ForceCalculator_H
 #define ForceCalculator_H
 
@@ -36,9 +37,9 @@ private:
 
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    double forceX1global;
-    double forceX2global;
-    double forceX3global;
+    real forceX1global;
+    real forceX2global;
+    real forceX3global;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
index 40a8011ca871965f4b389ce32559b847021d2fe2..741f1f340c4c02e9f5b08854e7205ab2aa5e507b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
@@ -42,7 +42,7 @@ InSituCatalystCoProcessor::InSituCatalystCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
 //////////////////////////////////////////////////////////////////////////
 InSituCatalystCoProcessor::~InSituCatalystCoProcessor() {}
 //////////////////////////////////////////////////////////////////////////
-void InSituCatalystCoProcessor::process(double step)
+void InSituCatalystCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -50,7 +50,7 @@ void InSituCatalystCoProcessor::process(double step)
     UBLOG(logDEBUG3, "InSituCatalystCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void InSituCatalystCoProcessor::collectData(double step)
+void InSituCatalystCoProcessor::collectData(real step)
 {
     unsigned int istep = static_cast<int>(step);
 
@@ -94,13 +94,13 @@ void InSituCatalystCoProcessor::addData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<LBMKernel> kernel                  = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     int minX1 = 0;
     int minX2 = 0;
@@ -122,7 +122,7 @@ void InSituCatalystCoProcessor::addData(SPtr<Block3D> block)
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
@@ -211,13 +211,13 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<LBMKernel> kernel                  = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -251,15 +251,15 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
     SPtr<BoundaryConditions> bcPtr;
     int nr = points->GetNumberOfPoints();
 
-    double x[3];
+    real x[3];
 
     for (size_t ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (size_t ix2 = minX2; ix2 <= maxX2; ix2++) {
             for (size_t ix1 = minX1; ix1 <= maxX1; ix1++) {
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
-                    x[0] = double(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
-                    x[1] = double(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
-                    x[2] = double(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
+                    x[0] = real(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
+                    x[1] = real(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
+                    x[2] = real(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
 
                     points->InsertPoint((vtkIdType)nr, x);
 
@@ -267,7 +267,7 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
index 5fa6d3fd13529431c125b799bcbb7ea47dccf1ed..e1c9fb95c7ff420362c3dcbe4b7444902beb6ba1 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
@@ -6,6 +6,7 @@
 #include <CoProcessor.h>
 #include <Grid3D.h>
 #include <LBMUnitConverter.h>
+#include "lbm/constants/D3Q27.h"
 
 #include <string>
 
@@ -24,10 +25,10 @@ public:
     InSituCatalystCoProcessor();
     InSituCatalystCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::string script);
     virtual ~InSituCatalystCoProcessor();
-    void process(double step);
+    void process(real step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(SPtr<Block3D> block);
     void buildVTKGrid();
     void addVTKGridData(SPtr<Block3D> block);
@@ -41,14 +42,14 @@ private:
     vtkSmartPointer<vtkUnstructuredGrid> unstructuredGrid;
     vtkSmartPointer<vtkPoints> points;
     vtkSmartPointer<vtkDoubleArray> arrays[4];
-    std::vector<double> vx1Array;
-    std::vector<double> vx2Array;
-    std::vector<double> vx3Array;
-    std::vector<double> rhoArray;
+    std::vector<real> vx1Array;
+    std::vector<real> vx2Array;
+    std::vector<real> vx3Array;
+    std::vector<real> rhoArray;
     int index;
     int numOfPoints;
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
     CalcMacrosFct calcMacros;
 };
 #endif // InSituCatalystCoProcessor_h__
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
index ed258864a4a87b473ca276064abf60ad5910828d..6b8026cedd8331c02b52a06c86c0e5d7821d0aa2 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
@@ -57,7 +57,7 @@ InSituVTKCoProcessor::InSituVTKCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler>
 //////////////////////////////////////////////////////////////////////////
 InSituVTKCoProcessor::~InSituVTKCoProcessor() { comm->CloseConnection(); }
 //////////////////////////////////////////////////////////////////////////
-void InSituVTKCoProcessor::process(double step)
+void InSituVTKCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -65,7 +65,7 @@ void InSituVTKCoProcessor::process(double step)
     UBLOG(logDEBUG3, "InSituVTKCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void InSituVTKCoProcessor::collectData(double step)
+void InSituVTKCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -127,20 +127,20 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
 
     // Funktionszeiger
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
 
     CalcMacrosFct calcMacros = NULL;
 
@@ -175,7 +175,7 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
     SPtr<BoundaryConditions> bcPtr;
     int nr = points->GetNumberOfPoints();
 
-    double x[3];
+    real x[3];
 
     for (size_t ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (size_t ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -183,9 +183,9 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
                     int index = 0;
 
-                    x[0] = double(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
-                    x[1] = double(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
-                    x[2] = double(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
+                    x[0] = real(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
+                    x[1] = real(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
+                    x[2] = real(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
 
                     points->InsertPoint((vtkIdType)nr, x);
 
@@ -193,7 +193,7 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
index 6789509569cabbdc39319f20749d9e0091736158..9456bf298b4e6e4af1e07ae14a46c3e11259cbaa 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
@@ -23,10 +23,10 @@ public:
     InSituVTKCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &configFile,
                          SPtr<LBMUnitConverter> conv);
     virtual ~InSituVTKCoProcessor();
-    void process(double step);
+    void process(real step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(SPtr<Block3D> block);
     void readConfigFile(const std::string &configFile);
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
index 4e711bd7c03b1da262c427230dc1c357966e1681..0d8b7827b48fb2a5e6e16d13538f63209e03e244 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
@@ -10,8 +10,8 @@
 #include "LBMKernel.h"
 
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
-                                             double minX3, double maxX1, double maxX2, double maxX3)
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+                                             real minX3, real maxX1, real maxX2, real maxX3)
     :
 
       grid(grid), comm(comm), sVx1(0.0), sVx2(0.0), sVx3(0.0), sRho(0.0), sCellVolume(0.0), numberOfFluidsNodes(0),
@@ -21,8 +21,8 @@ IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<
     init(-1);
 }
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
-                                             double minX3, double maxX1, double maxX2, double maxX3, int level)
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+                                             real minX3, real maxX1, real maxX2, real maxX3, int level)
     :
 
       grid(grid), comm(comm), sVx1(0.0), sVx2(0.0), sVx3(0.0), sRho(0.0), sCellVolume(0.0), numberOfFluidsNodes(0),
@@ -38,7 +38,7 @@ void IntegrateValuesHelper::init(int level)
 {
     root = comm->isRoot();
 
-    double orgX1, orgX2, orgX3;
+    real orgX1, orgX2, orgX3;
     int gridRank = grid->getRank();
     int minInitLevel, maxInitLevel;
     if (level < 0) {
@@ -49,8 +49,8 @@ void IntegrateValuesHelper::init(int level)
         maxInitLevel = level;
     }
 
-    double numSolids = 0.0;
-    double numFluids = 0.0;
+    real numSolids = 0.0;
+    real numFluids = 0.0;
     for (int level_it = minInitLevel; level_it <= maxInitLevel; level_it++) {
         std::vector<SPtr<Block3D>> blockVector;
         grid->getBlocks(level_it, gridRank, blockVector);
@@ -68,9 +68,9 @@ void IntegrateValuesHelper::init(int level)
             SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
             int ghostLayerWitdh                     = kernel->getGhostLayerWidth();
             SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-            double internX1, internX2, internX3;
+            real internX1, internX2, internX3;
 
-            double dx               = grid->getDeltaX(block);
+            real dx               = grid->getDeltaX(block);
             UbTupleDouble3 orgDelta = grid->getNodeOffset(block);
 
             for (int ix3 = ghostLayerWitdh; ix3 < (int)distributions->getNX3() - ghostLayerWitdh; ix3++) {
@@ -94,8 +94,8 @@ void IntegrateValuesHelper::init(int level)
                 cnodes.push_back(cn);
         }
     }
-    std::vector<double> rvalues;
-    std::vector<double> values;
+    std::vector<real> rvalues;
+    std::vector<real> values;
     values.push_back(numSolids);
     values.push_back(numFluids);
     rvalues = comm->gather(values);
@@ -122,15 +122,15 @@ void IntegrateValuesHelper::calculateAV()
         SPtr<AverageValuesArray3D> averagedValues = kernel->getDataSet()->getAverageValues();
 
         for (UbTupleInt3 node : cn.nodes) {
-            double Avx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVx);
-            double Avy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVy);
-            double Avz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVz);
+            real Avx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVx);
+            real Avy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVy);
+            real Avz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVz);
 
-            double Avxx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxx);
-            double Avyy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVyy);
-            double Avzz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVzz);
+            real Avxx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxx);
+            real Avyy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVyy);
+            real Avzz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVzz);
 
-            double Avxz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxz);
+            real Avxz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxz);
             sAvVx1 += std::abs(Avx);
             sAvVx2 += std::abs(Avy);
             sAvVx3 += std::abs(Avz);
@@ -143,8 +143,8 @@ void IntegrateValuesHelper::calculateAV()
             numberOfFluidsNodes++;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
     values.push_back(sAvVx1);
     values.push_back(sAvVx2);
     values.push_back(sAvVx3);
@@ -172,20 +172,20 @@ void IntegrateValuesHelper::calculateAV()
 //////////////////////////////////////////////////////////////////////////
 void IntegrateValuesHelper::calculateMQ()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
     clearData();
 
     // Funktionszeiger
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
 
     CalcMacrosFct calcMacros = NULL;
 
     for (CalcNodes cn : cnodes) {
         SPtr<ILBMKernel> kernel = cn.block->getKernel();
-        LBMReal dx              = 1.0 / (LBMReal)(1 << cn.block->getLevel());
-        LBMReal cellVolume      = dx * dx * dx;
+        real dx              = 1.0 / (real)(1 << cn.block->getLevel());
+        real cellVolume      = dx * dx * dx;
 
         if (kernel->getCompressible()) {
             calcMacros = &D3Q27System::calcCompMacroscopicValues;
@@ -205,8 +205,8 @@ void IntegrateValuesHelper::calculateMQ()
             sCellVolume += cellVolume;
         }
     }
-    std::vector<double> values(5);
-    std::vector<double> rvalues;
+    std::vector<real> values(5);
+    std::vector<real> rvalues;
     values[0] = sRho;
     values[1] = sVx1;
     values[2] = sVx2;
@@ -247,9 +247,9 @@ void IntegrateValuesHelper::clearData()
     sTSx1x3 = 0.0;
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal IntegrateValuesHelper::getNumberOfFluidsNodes() { return this->numberOfFluidsNodes; }
+real IntegrateValuesHelper::getNumberOfFluidsNodes() { return this->numberOfFluidsNodes; }
 //////////////////////////////////////////////////////////////////////////
-LBMReal IntegrateValuesHelper::getNumberOfSolidNodes() { return this->numberOfSolidNodes; }
+real IntegrateValuesHelper::getNumberOfSolidNodes() { return this->numberOfSolidNodes; }
 //////////////////////////////////////////////////////////////////////////
 GbCuboid3DPtr IntegrateValuesHelper::getBoundingBox() { return this->boundingBox; }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
index d6c87dcfd604bc1f1ded813b04e6ee71829c0d27..c804d74628570c4592c6715b7f76cd450c90ecfb 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
@@ -36,33 +36,33 @@ public:
     };
 
 public:
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
-                          double maxX1, double maxX2, double maxX3);
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
-                          double maxX1, double maxX2, double maxX3, int level);
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+                          real maxX1, real maxX2, real maxX3);
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+                          real maxX1, real maxX2, real maxX3, int level);
     virtual ~IntegrateValuesHelper();
 
     void calculateMQ();
     void calculateAV();
     void clearData();
 
-    double getRho() { return sRho; }
-    double getVx1() { return sVx1; }
-    double getVx2() { return sVx2; }
-    double getVx3() { return sVx3; }
-    double getCellsVolume() { return sCellVolume; }
+    real getRho() { return sRho; }
+    real getVx1() { return sVx1; }
+    real getVx2() { return sVx2; }
+    real getVx3() { return sVx3; }
+    real getCellsVolume() { return sCellVolume; }
     //  LBMReal getVm() { return sVm; }
     // LBMReal getPress() {return sPress;}
-    double getAvVx1() { return sAvVx1; }
-    double getAvVx2() { return sAvVx2; }
-    double getAvVx3() { return sAvVx3; }
-    double getTSx1() { return sTSx1; }
-    double getTSx2() { return sTSx2; }
-    double getTSx3() { return sTSx3; }
-    double getTSx1x3() { return sTSx1x3; }
+    real getAvVx1() { return sAvVx1; }
+    real getAvVx2() { return sAvVx2; }
+    real getAvVx3() { return sAvVx3; }
+    real getTSx1() { return sTSx1; }
+    real getTSx2() { return sTSx2; }
+    real getTSx3() { return sTSx3; }
+    real getTSx1x3() { return sTSx1x3; }
 
-    LBMReal getNumberOfFluidsNodes();
-    LBMReal getNumberOfSolidNodes();
+    real getNumberOfFluidsNodes();
+    real getNumberOfSolidNodes();
     GbCuboid3DPtr getBoundingBox();
     std::vector<CalcNodes> getCNodes();
 
@@ -72,9 +72,9 @@ private:
 
     bool root;
     SPtr<Grid3D> grid;
-    double sVx1, sVx2, sVx3, sRho, sCellVolume; // sPress, sVm;
-    double numberOfFluidsNodes, numberOfSolidNodes;
-    double sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
+    real sVx1, sVx2, sVx3, sRho, sCellVolume; // sPress, sVm;
+    real numberOfFluidsNodes, numberOfSolidNodes;
+    real sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
     std::vector<CalcNodes> cnodes;
     GbCuboid3DPtr boundingBox;
     std::shared_ptr<vf::mpi::Communicator> comm;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
index 9e5fa087fccf6d1121052ece7673a406984d52c0..98d0188fc25def9ee613e85f98d9623b0dfdf702 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
@@ -23,12 +23,12 @@ LineTimeSeriesCoProcessor::LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
     numOfProc = comm->getNumberOfProcesses();
     gridRank  = comm->getProcessID();
 
-    double dx = CoProcessor::grid->getDeltaX(level);
+    real dx = CoProcessor::grid->getDeltaX(level);
 
     SPtr<CoordinateTransformation3D> trafo = grid->getCoordinateTransformator();
-    double orgX1                           = trafo->getX1CoordinateOffset();
-    double orgX2                           = trafo->getX2CoordinateOffset();
-    double orgX3                           = trafo->getX3CoordinateOffset();
+    real orgX1                           = trafo->getX1CoordinateOffset();
+    real orgX2                           = trafo->getX2CoordinateOffset();
+    real orgX3                           = trafo->getX3CoordinateOffset();
 
     int x1min = (int)((line->getX1Minimum() - orgX1) / dx);
     int x1max = (int)((line->getX1Maximum() - orgX1) / dx);
@@ -61,7 +61,7 @@ LineTimeSeriesCoProcessor::LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
     ix3 = x3min % val<3>(blockNx) + 1;
 }
 //////////////////////////////////////////////////////////////////////////
-void LineTimeSeriesCoProcessor::process(double step)
+void LineTimeSeriesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         collectData();
@@ -74,12 +74,12 @@ void LineTimeSeriesCoProcessor::writeLine(const std::string &path)
 {
     std::vector<UbTupleFloat3> nodes(2);
     std::vector<UbTupleInt2> lines(1);
-    val<1>(nodes[0])            = (float)line->getX1Minimum();
-    val<2>(nodes[0])            = (float)line->getX2Minimum();
-    val<3>(nodes[0])            = (float)line->getX3Minimum();
-    val<1>(nodes[1])            = (float)line->getX1Maximum();
-    val<2>(nodes[1])            = (float)line->getX2Maximum();
-    val<3>(nodes[1])            = (float)line->getX3Maximum();
+    val<1>(nodes[0])            = (real)line->getX1Minimum();
+    val<2>(nodes[0])            = (real)line->getX2Minimum();
+    val<3>(nodes[0])            = (real)line->getX3Minimum();
+    val<1>(nodes[1])            = (real)line->getX1Maximum();
+    val<2>(nodes[1])            = (real)line->getX2Maximum();
+    val<3>(nodes[1])            = (real)line->getX3Maximum();
     val<1>(lines[0])            = 0;
     val<1>(lines[0])            = 1;
     WbWriterVtkXmlASCII *writer = WbWriterVtkXmlASCII::getInstance();
@@ -88,13 +88,13 @@ void LineTimeSeriesCoProcessor::writeLine(const std::string &path)
 //////////////////////////////////////////////////////////////////////////
 void LineTimeSeriesCoProcessor::collectData()
 {
-    LBMReal f[27];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[27];
+    real vx1, vx2, vx3, rho;
     MPI_Status status;
-    std::vector<double> v1(length, 0);
-    std::vector<double> v2(length, 0);
-    std::vector<double> v3(length, 0);
-    std::vector<double> p(length, 0);
+    std::vector<real> v1(length, 0);
+    std::vector<real> v2(length, 0);
+    std::vector<real> v3(length, 0);
+    std::vector<real> p(length, 0);
     for (int x = 0; x < length; x += blocknx) {
         if (dir == X1) {
             blockix1 = x / blocknx;
@@ -137,10 +137,10 @@ void LineTimeSeriesCoProcessor::collectData()
 
     if (root) {
         for (int i = 1; i < numOfProc; i++) {
-            std::vector<double> v1temp(length, 0);
-            std::vector<double> v2temp(length, 0);
-            std::vector<double> v3temp(length, 0);
-            std::vector<double> ptemp(length, 0);
+            std::vector<real> v1temp(length, 0);
+            std::vector<real> v2temp(length, 0);
+            std::vector<real> v3temp(length, 0);
+            std::vector<real> ptemp(length, 0);
             MPI_Recv(&v1temp[0], length, MPI_DOUBLE, i, 1, mpi_comm, &status);
             MPI_Recv(&v2temp[0], length, MPI_DOUBLE, i, 2, mpi_comm, &status);
             MPI_Recv(&v3temp[0], length, MPI_DOUBLE, i, 3, mpi_comm, &status);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
index 16061b0b259b9118a82f7f46abbb919250b5dfea..c510a9b46b265344242ab8e3c75560d370a53921 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
@@ -30,7 +30,7 @@ public:
                               int level, std::shared_ptr<vf::mpi::Communicator> comm);
     ~LineTimeSeriesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
     void writeLine(const std::string &path);
 
 protected:
@@ -42,7 +42,7 @@ private:
     bool root;
     SPtr<GbLine3D> line;
     // function pointer
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
     int blocknx;
     int blockix1;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
index a16f32c7d9e0d83dff90a55bb139d4115285a196..e238ec8b0f4f342fd24c39ed1e60ab2ad1d137fc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
@@ -203,8 +203,8 @@ void MPIIOCoProcessor::writeBlocks(int step)
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
 
     if (comm->isRoot()) {
@@ -242,8 +242,8 @@ void MPIIOCoProcessor::readBlocks(int step)
                            << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
index edee5255ebdb14ed23cd3f53e4738a3fd8d58186..68759639d10d0032114bfc30ca0ed5d650f35a3f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
@@ -19,7 +19,7 @@ public:
     ~MPIIOCoProcessor() override;
 
     //! Each timestep writes the grid into the files
-    void process(double step) override = 0;
+    void process(real step) override = 0;
 
     //! Writes the blocks of the grid into the file cpBlocks.bin
     void writeBlocks(int step);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
index 34c247345d2d3cf1a9c527ab7a7b15e23f7ab5da..fd08df50a3f0cb9b85fd3255465170a4ef1681a9 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
@@ -40,7 +40,7 @@ MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr
 //////////////////////////////////////////////////////////////////////////
 MPIIOMigrationBECoProcessor::~MPIIOMigrationBECoProcessor() { MPI_Type_free(&sendBlockIntType); }
 
-void MPIIOMigrationBECoProcessor::process(double step)
+void MPIIOMigrationBECoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         if (comm->isRoot())
@@ -115,9 +115,9 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -132,9 +132,9 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     int doubleCountInBlock = 0;
     int ic                 = 0;
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
     
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -197,55 +197,55 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
                     dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -295,8 +295,8 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -315,7 +315,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     MPI_File_write_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
     MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -333,7 +333,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
         MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
         MPI_File_sync(file_handler);
@@ -348,7 +348,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
         MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
         MPI_File_sync(file_handler);
@@ -427,12 +427,12 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     }
 
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArray; // double-values of the data array in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the data array in all blocks
     dataSetParam dataSetParamStr;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     if (comm->isRoot()) 
     {
@@ -498,8 +498,8 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     MPI_Type_commit(&dataSetDoubleType);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -511,7 +511,7 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -546,12 +546,12 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     }
 
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArray; // double-values of the data array in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the data array in all blocks
     dataSetParam dataSetParamStr;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     if (comm->isRoot()) 
     {
@@ -615,8 +615,8 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     MPI_Type_commit(&dataSetDoubleType);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -628,7 +628,7 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -706,16 +706,16 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
                     bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
                     bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
                     for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
                     bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
                 }
 
@@ -757,8 +757,8 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -865,7 +865,7 @@ void MPIIOMigrationBECoProcessor::restart(int step)
 
 void MPIIOMigrationBECoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
 
-void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV, std::vector<double> *rawDataReceive)
+void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<real> &pV, std::vector<real> *rawDataReceive)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -878,7 +878,7 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
     int *blocksCounterSend = new int[size];
     int *blocksCounterRec  = new int[size];
 
-    std::vector<double> *rawDataSend = new std::vector<double>[size];
+    std::vector<real> *rawDataSend = new std::vector<real>[size];
     for (int r = 0; r < size; r++) 
     {
         rawDataSend[r].resize(0);
@@ -900,13 +900,13 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
         if (tempRank == rank) // no need to send data, the process already has it
         {
             blocksCounterRec[tempRank]++;
-            rawDataReceive[tempRank].push_back(double(indexB + ind));
+            rawDataReceive[tempRank].push_back(real(indexB + ind));
             rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(), pV.begin() + ind * size_t(doubleCountInBlock),
                                             pV.begin() + ind * size_t(doubleCountInBlock) + size_t(doubleCountInBlock));
         } else // we must send data to other processes
         {
             blocksCounterSend[tempRank]++;
-            rawDataSend[tempRank].push_back(double(indexB + ind));
+            rawDataSend[tempRank].push_back(real(indexB + ind));
             rawDataSend[tempRank].insert(rawDataSend[tempRank].end(), pV.begin() + ind * size_t(doubleCountInBlock),
                                          pV.begin() + ind * size_t(doubleCountInBlock) + size_t(doubleCountInBlock));
         }
@@ -1040,8 +1040,8 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     int indexE = indexB + int(myBlocksCount); // the latest "my" block
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1058,15 +1058,15 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
         dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(myBlocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
+    std::vector<real> doubleValuesArrayF(size_t(myBlocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
     //--------------------------------- F ---------------------------------------------------------
-    MPI_Offset read_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
+    MPI_Offset read_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real));
     MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayF[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
@@ -1083,7 +1083,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         multiPhase1 = true;
         doubleValuesArrayH1.resize(myBlocksCount * doubleCountInBlock);
 
-        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double)) ;
+        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real)) ;
         MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
     }
     MPI_File_close(&file_handler);
@@ -1099,7 +1099,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         multiPhase2 = true;
         doubleValuesArrayH2.resize(myBlocksCount * doubleCountInBlock);
 
-        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
+        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real));
         MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
     }
     MPI_File_close(&file_handler);
@@ -1114,13 +1114,13 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    std::vector<double>* rawDataReceiveF = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveF = new std::vector<real>[size];
     for (int r = 0; r < size; r++)
         rawDataReceiveF[r].resize(0);
     blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayF, rawDataReceiveF);
     
 
-    std::vector<double>* rawDataReceiveH1 = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveH1 = new std::vector<real>[size];
     if (multiPhase1)
     {
         for (int r = 0; r < size; r++)
@@ -1128,7 +1128,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayH1, rawDataReceiveH1);
     }
 
-    std::vector<double>* rawDataReceiveH2 = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveH2 = new std::vector<real>[size];
     if (multiPhase2)
     {
         for (int r = 0; r < size; r++)
@@ -1146,9 +1146,9 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     
     //-------------------------------------- restore blocks ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
 
     size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
     size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
@@ -1185,11 +1185,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             index += vectorSize3;
 
             SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
@@ -1199,11 +1199,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
             if (multiPhase1)
             {
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
                 dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1214,11 +1214,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
             if (multiPhase2)
             {
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                        new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                        new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
                 dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1231,9 +1231,9 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             this->lbmKernel->setBlock(block);
             this->lbmKernel->setNX(std::array<int, 3>{ {dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
             SPtr<LBMKernel> kernel = this->lbmKernel->clone();
-            LBMReal collFactor = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
-            LBMReal collFactorL = LBMSystem::calcCollisionFactor(this->nuL, block->getLevel());
-            LBMReal collFactorG = LBMSystem::calcCollisionFactor(this->nuG, block->getLevel());
+            real collFactor = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
+            real collFactorL = LBMSystem::calcCollisionFactor(this->nuL, block->getLevel());
+            real collFactorG = LBMSystem::calcCollisionFactor(this->nuG, block->getLevel());
             kernel->setCollisionFactor(collFactor);
             kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
             kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -1319,8 +1319,8 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1348,12 +1348,12 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(myBlocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(myBlocksCount * doubleCountInBlock); // real-values in all blocks
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
     MPI_File_read_at(file_handler, read_offset, &doubleValuesArray[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
@@ -1367,7 +1367,7 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    std::vector<double> *rawDataReceive = new std::vector<double>[size];
+    std::vector<real> *rawDataReceive = new std::vector<real>[size];
     for (int r = 0; r < size; r++)
         rawDataReceive[r].resize(0);
 
@@ -1384,7 +1384,7 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
 
     //----------------------------- restore data ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValues;
+    std::vector<real> vectorsOfValues;
     size_t index;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
@@ -1401,53 +1401,53 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
             index += nextVectorSize;
 
             // fill arrays
-            SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-            SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
+            SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+            SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
 
             switch (arrType) 
             {
                 case AverageDensity:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                     break;
                 case AverageVelocity:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                     break;
                 case AverageFluktuations:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                     break;
                 case AverageTriple:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                     break;
                 case ShearStressVal:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                     break;
                 case RelaxationFactor:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                     break;
                 case PhaseField1:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                     break;
                 case PhaseField2:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
                     break;
                 case PressureField:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPressureField(___3DArray);
                     break;
@@ -1480,8 +1480,8 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1725,9 +1725,9 @@ void MPIIOMigrationBECoProcessor::setLBMKernel(SPtr<LBMKernel> kernel) { this->l
 //////////////////////////////////////////////////////////////////////////
 void MPIIOMigrationBECoProcessor::setBCProcessor(SPtr<BCProcessor> bcProcessor) { this->bcProcessor = bcProcessor; }
 //////////////////////////////////////////////////////////////////////////
-void MPIIOMigrationBECoProcessor::setNu(double nu) { this->nue = nu; }
+void MPIIOMigrationBECoProcessor::setNu(real nu) { this->nue = nu; }
 
-void MPIIOMigrationBECoProcessor::setNuLG(double cfL, double cfG) { this->nuL = cfL;  this->nuG = cfG; }
+void MPIIOMigrationBECoProcessor::setNuLG(real cfL, real cfG) { this->nuL = cfL;  this->nuG = cfG; }
 
-void MPIIOMigrationBECoProcessor::setDensityRatio(double dr) { this->densityRatio = dr; }
+void MPIIOMigrationBECoProcessor::setDensityRatio(real dr) { this->densityRatio = dr; }
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
index c60800ccd18e5ac523c5c85ea47219a96f8a69c5..d29ecca36d194cd0498b3347b9e59eefced8a475 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
@@ -36,7 +36,7 @@ public:
                                 std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationBECoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
@@ -73,12 +73,12 @@ public:
     void setBCProcessor(SPtr<BCProcessor> bcProcessor);
     //! The function truncates the data files
     void clearAllFiles(int step);
-    void setNu(double nu);
-    void setNuLG(double cfL, double cfG);
-    void setDensityRatio(double dr);
+    void setNu(real nu);
+    void setNuLG(real cfL, real cfG);
+    void setDensityRatio(real dr);
 
-    void blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV,
-                        std::vector<double> *rawDataReceive);
+    void blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<real> &pV,
+                        std::vector<real> *rawDataReceive);
 
 private:
     // MPI_Datatype gridParamType, block3dType;
@@ -92,10 +92,10 @@ private:
     SPtr<LBMKernel> lbmKernel;
     SPtr<BCProcessor> bcProcessor;
     SPtr<Grid3DVisitor> metisVisitor;
-    double nue;
-    double nuL;
-    double nuG;
-    double densityRatio;
+    real nue;
+    real nuL;
+    real nuG;
+    real densityRatio;
 
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
index 285d6c28ae92b3bad7fb6b1171f3a09a637e0729..4e3a84aa8a47051aee123181fc63741266dbacc0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
@@ -69,7 +69,7 @@ MPIIOMigrationCoProcessor::~MPIIOMigrationCoProcessor()
 }
 
 //////////////////////////////////////////////////////////////////////////
-void MPIIOMigrationCoProcessor::process(double step)
+void MPIIOMigrationCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) 
     {
@@ -133,9 +133,9 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetMigration *dataSetArray = new DataSetMigration[blocksCount];
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -151,9 +151,9 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
 
     SPtr<LBMKernel> kernel;
 
@@ -230,55 +230,55 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
                     dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -328,8 +328,8 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -352,7 +352,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
     
     MPI_Offset write_offset;
-    size_t sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real);
 
     for (int nb = 0; nb < blocksCount; nb++) 
     {
@@ -372,7 +372,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = doubleCountInBlock * sizeof(real);
 
         for (int nb = 0; nb < blocksCount; nb++) 
         {
@@ -392,7 +392,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = doubleCountInBlock * sizeof(real);
 
         for (int nb = 0; nb < blocksCount; nb++) 
         {
@@ -471,7 +471,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     }
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot()) 
@@ -483,7 +483,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     bool firstBlock           = true;
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -543,8 +543,8 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -560,7 +560,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     MPI_Offset write_offset;
-    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
     for (int nb = 0; nb < blocksCount; nb++) 
     {
@@ -601,7 +601,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     }
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<real> doubleValuesArray; // real-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot()) 
@@ -613,7 +613,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     bool firstBlock           = true;
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -670,8 +670,8 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -692,7 +692,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
     MPI_Offset write_offset;
     for (int nb = 0; nb < blocksCount; nb++) 
@@ -778,16 +778,16 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
                     bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
                     bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
                     for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
                     bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
                 }
 
@@ -829,8 +829,8 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -942,8 +942,8 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -977,9 +977,9 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
         dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
+    std::vector<real> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
@@ -987,7 +987,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
 
     size_t ic = 0;
     MPI_Offset read_offset;
-    size_t sizeofOneDataSet = size_t(sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double));
+    size_t sizeofOneDataSet = size_t(sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real));
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -1016,7 +1016,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         multiPhase1 = true;
         doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
 
-        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(real));
 
         for (int level = minInitLevel; level <= maxInitLevel; level++)
         {
@@ -1043,7 +1043,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         multiPhase2 = true;
         doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
 
-        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(real));
 
         for (int level = minInitLevel; level <= maxInitLevel; level++)
         {
@@ -1069,9 +1069,9 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     }
 
     size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
 
     size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
     size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
@@ -1101,22 +1101,22 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         index += vectorSize3;
  
         SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+            new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+            new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
             vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
         
         //----------------------------------------- H1 ----------------------------------------------------
        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
        if (multiPhase1)
         {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                 vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1127,11 +1127,11 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
         if (multiPhase2)
         {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1228,8 +1228,8 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1257,7 +1257,7 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
     size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(blocksCount * doubleCountInBlock); // real-values in all blocks
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
@@ -1265,7 +1265,7 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
 
     size_t ic = 0;
     MPI_Offset read_offset;
-    size_t sizeofOneDataSet = size_t(sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double));
+    size_t sizeofOneDataSet = size_t(sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real));
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -1293,9 +1293,9 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     //----------------------------- restore data ---------------------------------
     size_t index = 0;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
+    std::vector<real> vectorsOfValues;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
 
     for (std::size_t n = 0; n < blocksCount; n++)
     {
@@ -1308,47 +1308,47 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
         switch (arrType) 
         {
             case AverageDensity:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                 break;
             case AverageVelocity:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                 break;
             case AverageFluktuations:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                 break;
             case AverageTriple:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                 break;
             case ShearStressVal:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                 break;
             case RelaxationFactor:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                 break;
             case PhaseField1:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                 break;
             case PhaseField2:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
                 break;
             case PressureField:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPressureField(___3DArray);
                 break;
@@ -1380,8 +1380,8 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
index b822b783edd3628f947aadf20d6dc6109e9e3c31..4b8c6231176923618b1a394f125ff32b0e450d54 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
@@ -34,7 +34,7 @@ public:
     MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationCoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
@@ -71,7 +71,7 @@ public:
     void setBCProcessor(SPtr<BCProcessor> bcProcessor);
     //! The function truncates the data files
     void clearAllFiles(int step);
-    // void setNu(double nu);
+    // void setNu(real nu);
 
 private:
     // MPI_Datatype gridParamType, block3dType;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
index ea45bb14110a071724f816b3c7840ce0dfbd7327..f55d2e08a847e0892810ea12da8d97454c1416f7 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
@@ -77,7 +77,7 @@ MPIIORestartCoProcessor::~MPIIORestartCoProcessor()
 }
 
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::process(double step)
+void MPIIORestartCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) 
     {
@@ -138,9 +138,9 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -156,9 +156,9 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     int ic                 = 0;
 
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH1, D3Q27EsoTwist3DSplittedVectorPtrH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH1, localDistributionsH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH1, nonLocalDistributionsH2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH1, zeroDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH1, localDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH1, nonLocalDistributionsH2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH1, zeroDistributionsH2;
  
     SPtr<LBMKernel> kernel;
 
@@ -238,55 +238,55 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
                      dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                      dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = kernel->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = kernel->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = kernel->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = kernel->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = kernel->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = kernel->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = kernel->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = kernel->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = kernel->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = kernel->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = kernel->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = kernel->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = kernel->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = kernel->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = kernel->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = kernel->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -345,21 +345,21 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     {
         if (rank == 0) 
         {
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -472,33 +472,6 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     if (arrPresence.isPressureFieldPresent)
         write3DArray(step, PressureField, std::string("/cpPressureField.bin"));
 
-    /*if (arrPresence.isAverageDensityArrayPresent)
-        writeAverageDensityArray(step);
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        writeAverageVelocityArray(step);
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        writeAverageFluktuationsArray(step);
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        writeAverageTripleArray(step);
-
-    if (arrPresence.isShearStressValArrayPresent)
-        writeShearStressValArray(step);
-
-    if (arrPresence.isRelaxationFactorPresent)
-        writeRelaxationFactor(step);
-
-    if (arrPresence.isPhaseField1Present)
-        writePhaseField(step, 1);
-
-    if (arrPresence.isPhaseField2Present)
-        writePhaseField(step, 2);
-
-    if (arrPresence.isPressureFieldPresent)
-        writePressureField(step);*/
-
 }
 
 void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::string fname)
@@ -519,7 +492,7 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     }
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot())
@@ -531,7 +504,7 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     bool firstBlock = true;
     int doubleCountInBlock = 0;
     int ic = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++)
     {
@@ -603,20 +576,20 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     {
         if (rank == 0)
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         }
         else
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -676,7 +649,7 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     }
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<real> doubleValuesArray; // real-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot())
@@ -688,7 +661,7 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     bool firstBlock = true;
     size_t doubleCountInBlock = 0;
     int ic = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++)
     {
@@ -757,21 +730,21 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     {
         if (rank == 0)
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
     }
         else
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
 }
 
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -815,13 +788,22 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     delete[] dataSetSmallArray;
 }
 
-/*void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
+void MPIIORestartCoProcessor::writeBoundaryConds(int step)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    if (comm->isRoot()) 
+    {
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = " << rank);
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+    }
+
+    int blocksCount          = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    size_t count_boundCond   = 0; // how many BoundaryConditions in all blocks
+    int count_indexContainer = 0; // how many indexContainer-values in all blocks
+    size_t byteCount         = 0; // how many bytes writes this process in the file
 
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -832,87 +814,128 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray;
+    BCAddRestart *bcAddArray = new BCAddRestart[blocksCount];
+    std::vector<BoundaryCondition> bcVector;
+    std::vector<int> bcindexmatrixV;
+    std::vector<int> indexContainerV;
+    bool bcindexmatrixCountNotInit = true;
+    int ic = 0;
+    SPtr<BCArray3D> bcArr;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
+        for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
         {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
+            bcArr = block->getKernel()->getBCProcessor()->getBCArray();
 
-            averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+            bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            bcAddArray[ic].x2                   = block->getX2();
+            bcAddArray[ic].x3                   = block->getX3();
+            bcAddArray[ic].level                = block->getLevel();
+            bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
+            bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
 
-            if (firstBlock) // when first (any) valid block...
+            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
             {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(averageDensityArray->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(averageDensityArray->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(averageDensityArray->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(averageDensityArray->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                BoundaryCondition *bouCond = new BoundaryCondition();
+                if (bcArr->bcvector[bc] == NULL) 
+                {
+                    memset(bouCond, 0, sizeof(BoundaryCondition));
+                } 
+                else 
+                {
+                    bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
+                    bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
+                    bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
+                    bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
+                    bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
+                    for (int iq = 0; iq < 26; iq++)
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
+                    bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
+                }
 
-                firstBlock = false;
+                bcVector.push_back(*bouCond);
+                bcAddArray[ic].boundCond_count++;
+                count_boundCond++;
+            }
+
+            // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D)
+            // is always equal, this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when
+            // writing/reading BoundConds
+            if (bcindexmatrixCountNotInit) 
+            {
+                boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
+                boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
+                boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
+                boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
+                bcindexmatrixCountNotInit            = false;
             }
+            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), averageDensityArray->getDataVector().begin(), averageDensityArray->getDataVector().end());
+            indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
+            bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
+            count_indexContainer += bcAddArray[ic].indexContainer_count;
 
             ic++;
         }
     }
 
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
+    MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+    MPI_Type_commit(&bcindexmatrixType);
 
-    if (comm->isRoot()) 
+    // how many "big blocks" of BLOCK_SIZE size can by formed
+    int bcBlockCount = (int)(count_boundCond / BLOCK_SIZE);
+    if (bcBlockCount * BLOCK_SIZE < (int)count_boundCond)
+        bcBlockCount += 1;
+    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) 
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        BoundaryCondition *bouCond = new BoundaryCondition();
+        memset(bouCond, 0, sizeof(BoundaryCondition));
+        bcVector.push_back(*bouCond);
     }
 
+    byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAddRestart) +
+                sizeof(int) * (blocksCount * boundCondParamStr.bcindexmatrixCount + count_indexContainer);
+
     // write to the file
     // all processes calculate their offsets (quantity of bytes that the process is going to write)
     // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
+    MPI_Offset write_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
     size_t next_write_offset = 0;
 
     if (size > 1) 
     {
         if (rank == 0) 
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + byteCount;
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + byteCount;
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
+    if (comm->isRoot()) 
+    {
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+    }
+
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -925,2388 +948,329 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
 #endif
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageDensityArray.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
+    MPI_Offset write_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
+
     // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    // each process writes the quantity of indexContainer elements in all blocks
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,  MPI_STATUS_IGNORE);
+    // each process writes the quantity of bcindexmatrix elements in every block
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+
+    // each process writes data identifying the blocks
+    MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
+    // each process writes boundary conditions
+    if (bcVector.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart)), &bcVector[0],
+                          bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
+    // each process writes bcindexmatrix values
+    if (bcindexmatrixV.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
+                          &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
+    // each process writes indexContainer values
+    if (indexContainerV.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
+                      blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
+    MPI_Type_free(&bcindexmatrixType);
 
     if (comm->isRoot()) 
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: " << finish - start << " s");
     }
 
-    delete[] dataSetSmallArray;
+    delete[] bcAddArray;
 }
 
-void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
+//------------------------------------------- READ -----------------------------------------------
+void MPIIORestartCoProcessor::restart(int step)
 {
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (comm->isRoot())
+        UBLOG(logINFO, "MPIIORestartCoProcessor restart step: " << step);
+    if (comm->isRoot())
+        UBLOG(logINFO, "Load check point - start");
 
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    readBlocks(step);
+    readDataSet(step);
+    readBoundaryConds(step);
 
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
+    grid->setTimeStep(step);
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
+    if (comm->isRoot())
+        UBLOG(logINFO, "Load check point - end");
+}
+
+void MPIIORestartCoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
+
+void MPIIORestartCoProcessor::readDataSet(int step)
+{
+    int rank, size;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
 
     if (comm->isRoot()) 
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start collect data rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
+    
+    real start {0.};
+    real finish {0.};
+    if (comm->isRoot())
+        start = MPI_Wtime();
 
-            AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+    MPI_File file_handler;
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
+    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageVelocityArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    // calculate the read offset
+    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
+    size_t next_read_offset = 0;
+    bool multiPhase1 = false;
+    bool multiPhase2 = false;
 
-                firstBlock = false;
-            }
+    // read count of blocks
+    int blocksCount = 0;
+    dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
+    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, read_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-            ic++;
-        }
-    }
+    DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
+    real doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
+        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<real> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+    //   define MPI_types depending on the block-specific information
+    MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageVelocityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-
-    MPI_Type_free(&dataSetDoubleType);
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageFluctArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&(dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(),
-                                         AverageFluctArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageTripleArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageTripleArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeShearStressValArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(ShearStressValArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(),
-                                         ShearStressValArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpShearStressValArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> RelaxationFactor3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            RelaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(RelaxationFactor3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(RelaxationFactor3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(RelaxationFactor3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), RelaxationFactor3DPtr->getDataVector().begin(),
-                                         RelaxationFactor3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpRelaxationFactor.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writePhaseField(int step, int fieldN)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> PhaseField3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            if(fieldN == 1)
-                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField();
-            else
-                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField2();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(PhaseField3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(PhaseField3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(PhaseField3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-                std::cout << "writePhaseField"<<fieldN<< " = " << dataSetParamStr.nx[0] << " " << dataSetParamStr.nx[1] << " " << dataSetParamStr.nx[2] << std::endl;
-                firstBlock = false;
-            }
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), PhaseField3DPtr->getDataVector().begin(), PhaseField3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-        
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename;
-    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
-    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writePressureField(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++)
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock = true;
-    int doubleCountInBlock = 0;
-    int ic = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> PressureField3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++)
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            PressureField3DPtr = block->getKernel()->getDataSet()->getPressureField();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(PressureField3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(PressureField3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(PressureField3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), PressureField3DPtr->getDataVector().begin(),
-                    PressureField3DPtr->getDataVector().end());
- 
-            ic++;
-        }
-    }
-    //doubleValuesArrayRW.assign(doubleValuesArray.begin(), doubleValuesArray.end());
-    //std::cout << "doubleValuesArrayRW = " << doubleValuesArrayRW.size() << std::endl;
-   // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1)
-    {
-        if (rank == 0)
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        }
-        else
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-
-    double start{ 0. };
-    double finish{ 0. };
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPressureField.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-        dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}*/
-
-void MPIIORestartCoProcessor::writeBoundaryConds(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    int blocksCount          = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-    size_t count_boundCond   = 0; // how many BoundaryConditions in all blocks
-    int count_indexContainer = 0; // how many indexContainer-values in all blocks
-    size_t byteCount         = 0; // how many bytes writes this process in the file
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    BCAddRestart *bcAddArray = new BCAddRestart[blocksCount];
-    std::vector<BoundaryCondition> bcVector;
-    std::vector<int> bcindexmatrixV;
-    std::vector<int> indexContainerV;
-    bool bcindexmatrixCountNotInit = true;
-    int ic = 0;
-    SPtr<BCArray3D> bcArr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
-        {
-            bcArr = block->getKernel()->getBCProcessor()->getBCArray();
-
-            bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            bcAddArray[ic].x2                   = block->getX2();
-            bcAddArray[ic].x3                   = block->getX3();
-            bcAddArray[ic].level                = block->getLevel();
-            bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
-            bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
-
-            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
-            {
-                BoundaryCondition *bouCond = new BoundaryCondition();
-                if (bcArr->bcvector[bc] == NULL) 
-                {
-                    memset(bouCond, 0, sizeof(BoundaryCondition));
-                } 
-                else 
-                {
-                    bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
-                    bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
-                    bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
-                    bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
-                    bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
-                    for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
-                    bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
-                }
-
-                bcVector.push_back(*bouCond);
-                bcAddArray[ic].boundCond_count++;
-                count_boundCond++;
-            }
-
-            // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D)
-            // is always equal, this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when
-            // writing/reading BoundConds
-            if (bcindexmatrixCountNotInit) 
-            {
-                boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
-                boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
-                boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
-                boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
-                bcindexmatrixCountNotInit            = false;
-            }
-            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
-
-            indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
-            bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
-            count_indexContainer += bcAddArray[ic].indexContainer_count;
-
-            ic++;
-        }
-    }
-
-    MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
-    MPI_Type_commit(&bcindexmatrixType);
-
-    // how many "big blocks" of BLOCK_SIZE size can by formed
-    int bcBlockCount = (int)(count_boundCond / BLOCK_SIZE);
-    if (bcBlockCount * BLOCK_SIZE < (int)count_boundCond)
-        bcBlockCount += 1;
-    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) 
-    {
-        BoundaryCondition *bouCond = new BoundaryCondition();
-        memset(bouCond, 0, sizeof(BoundaryCondition));
-        bcVector.push_back(*bouCond);
-    }
-
-    byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAddRestart) +
-                sizeof(int) * (blocksCount * boundCondParamStr.bcindexmatrixCount + count_indexContainer);
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + byteCount;
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + byteCount;
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    MPI_Offset write_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes the quantity of indexContainer elements in all blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,  MPI_STATUS_IGNORE);
-    // each process writes the quantity of bcindexmatrix elements in every block
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
-
-    // each process writes data identifying the blocks
-    MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
-    // each process writes boundary conditions
-    if (bcVector.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart)), &bcVector[0],
-                          bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
-    // each process writes bcindexmatrix values
-    if (bcindexmatrixV.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
-                          &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
-    // each process writes indexContainer values
-    if (indexContainerV.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
-                      blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&bcindexmatrixType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: " << finish - start << " s");
-    }
-
-    delete[] bcAddArray;
-}
-
-//------------------------------------------- READ -----------------------------------------------
-void MPIIORestartCoProcessor::restart(int step)
-{
-    if (comm->isRoot())
-        UBLOG(logINFO, "MPIIORestartCoProcessor restart step: " << step);
-    if (comm->isRoot())
-        UBLOG(logINFO, "Load check point - start");
-
-    readBlocks(step);
-    readDataSet(step);
-    readBoundaryConds(step);
-
-    grid->setTimeStep(step);
-
-    if (comm->isRoot())
-        UBLOG(logINFO, "Load check point - end");
-}
-
-void MPIIORestartCoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
-
-void MPIIORestartCoProcessor::readDataSet(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-    bool multiPhase1 = false;
-    bool multiPhase2 = false;
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, read_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    double doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
-        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
-
-    //   define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
-                     &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-
- //-------------------------------------- H1 -----------------------------
-    MPI_Offset fsize;
-    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-    MPI_File_get_size(file_handler, &fsize);
-    if (fsize > 0)
-    {
-        multiPhase1 = true;
-        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
-        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    }
-    MPI_File_close(&file_handler);
-
-    //-------------------------------------- H2 -----------------------------
-    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    MPI_File_get_size(file_handler, &fsize);
-    if (fsize > 0)
-    {
-        multiPhase2 = true;
-        doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
-        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    }
-    MPI_File_close(&file_handler);
-    //-------------------------------------------------------------------
-
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
-    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
-    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
-    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
-        if (multiPhase1)
-            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
-        if (multiPhase2)
-            vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
-        index += vectorSize1;
-
-        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
-        if (multiPhase1)
-            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
-        if (multiPhase2)
-            vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
-        index += vectorSize2;
-
-        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
-        if (multiPhase1)
-            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
-        if (multiPhase2)
-            vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
-        index += vectorSize3;
-
-        SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                    vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
-
-        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
-        if (multiPhase1)
-        {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
-        }
-
-        SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
-        if (multiPhase2)
-        {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                    vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);
-        }
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
-   
-        this->lbmKernel->setBlock(block);
-        this->lbmKernel->setNX(std::array<int, 3>{{dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
-        SPtr<LBMKernel> kernel = this->lbmKernel->clone();
-        kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
-        kernel->setCollisionFactor(dataSetArray[n].collFactor);
-        kernel->setDeltaT(dataSetArray[n].deltaT);
-        kernel->setCompressible(dataSetArray[n].compressible);
-        kernel->setWithForcing(dataSetArray[n].withForcing);
-        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
-        kernel->setDensityRatio(dataSetArray[n].densityRatio);
-
-        SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
-        dataSetPtr->setFdistributions(mFdistributions);
-        if (multiPhase1)
-            dataSetPtr->setHdistributions(mH1distributions);
-        if (multiPhase2)
-            dataSetPtr->setH2distributions(mH2distributions);
-        kernel->setDataSet(dataSetPtr);
-        block->setKernel(kernel);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetArray;
-
-    //-------------------------------------------------------------
-
-    DSArraysPresence arrPresence;
-    MPI_File file_handler1;
-    std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpArrays.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler1);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename1);
-    MPI_File_read_at(file_handler1, (MPI_Offset)0, &arrPresence, 1, arrayPresenceType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler1);
-
-    /*if (arrPresence.isAverageDensityArrayPresent)
-        readAverageDensityArray(step);
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        readAverageVelocityArray(step);
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        readAverageFluktuationsArray(step);
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        readAverageTripleArray(step);
-
-    if (arrPresence.isShearStressValArrayPresent)
-        readShearStressValArray(step);
-
-    if (arrPresence.isRelaxationFactorPresent)
-        readRelaxationFactor(step);
-
-    if (arrPresence.isPhaseField1Present)
-        readPhaseField(step, 1);
-
-    if (arrPresence.isPhaseField2Present)
-        readPhaseField(step, 2);
-
-    if (arrPresence.isPressureFieldPresent)
-        readPressureField(step);*/
-
-    if (arrPresence.isAverageDensityArrayPresent)
-        readArray(step, AverageDensity, std::string("/cpAverageDensityArray.bin"));
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        readArray(step, AverageVelocity, std::string("/cpAverageVelocityArray.bin"));
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        readArray(step, AverageFluktuations, std::string("/cpAverageFluktuationsArray.bin"));
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        readArray(step, AverageTriple, std::string("/cpAverageTripleArray.bin"));
-
-    if (arrPresence.isShearStressValArrayPresent)
-        readArray(step, ShearStressVal, std::string("/cpShearStressValArray.bin"));
-
-    if (arrPresence.isRelaxationFactorPresent)
-        readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
-
-    if (arrPresence.isPhaseField1Present)
-        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
-
-    if (arrPresence.isPhaseField2Present)
-        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
-
-    if (arrPresence.isPressureFieldPresent)
-        readArray(step, PressureField, std::string("/cpPressureField.bin"));
-
-}
-
-void MPIIORestartCoProcessor::readArray(int step, Arrays arrType, std::string fname)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start fname = " << fname);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start{ 0. };
-    double finish{ 0. };
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + fname;
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    size_t blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1)
-    {
-        if (rank == 0)
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        }
-        else
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, (int)blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(
-            file_handler,
-            (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], (int)blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    //----------------------------- restore data ---------------------------------
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (std::size_t n = 0; n < blocksCount; n++)
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-
-       // fill arrays
-       switch (arrType)
-        {
-        case AverageDensity:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
-            break;
-        case AverageVelocity:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
-            break;
-        case AverageFluktuations:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
-            break;
-        case AverageTriple:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
-            break;
-        case ShearStressVal:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
-            break;
-        case RelaxationFactor:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
-            break;
-        case PhaseField1:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPhaseField(___3DArray);
-            break;
-        case PhaseField2:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
-            break;
-        case PressureField:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPressureField(___3DArray);
-            break;
-        default:
-            UB_THROW(UbException(UB_EXARGS, "MPIIORestartCoProcessor::readArray : array type does not exist!"));
-            break;
-        }
-    }
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-/*void MPIIORestartCoProcessor::readAverageDensityArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageDensityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill mAverageDensity arrays
-        SPtr<AverageValuesArray3D> mAverageDensity;
-        mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-            dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageDensity(mAverageDensity);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageVelocityArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageVelocityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill mAverageVelocity array
-        SPtr<AverageValuesArray3D> mAverageVelocity;
-        mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], 
-            dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageVelocity(mAverageVelocity);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageFluktuationsArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename =
-        path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill AverageFluktuations array
-        SPtr<AverageValuesArray3D> mAverageFluktuations;
-        mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageFluctuations(mAverageFluktuations);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageTripleArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageTripleArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill AverageTriplecorrelations array
-        SPtr<AverageValuesArray3D> mAverageTriplecorrelations;
-        mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageTriplecorrelations(mAverageTriplecorrelations);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readShearStressValArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpShearStressValArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
+    if (size > 1) 
     {
         if (rank == 0) 
         {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(real));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
+                     &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill ShearStressValuesArray array
-        SPtr<ShearStressValuesArray3D> mShearStressValues;
-        mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setShearStressValues(mShearStressValues);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readRelaxationFactor(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpRelaxationFactor.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+ //-------------------------------------- H1 -----------------------------
+    MPI_Offset fsize;
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
     {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
+        multiPhase1 = true;
+        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     }
+    MPI_File_close(&file_handler);
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    //-------------------------------------- H2 -----------------------------
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
+    {
+        multiPhase2 = true;
+        doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    }
     MPI_File_close(&file_handler);
+    //-------------------------------------------------------------------
+
     MPI_Type_free(&dataSetDoubleType);
 
     if (comm->isRoot()) 
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
-
+    
     size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
+    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
+    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+
     for (int n = 0; n < blocksCount; n++) 
     {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill RelaxationFactor array
-        SPtr<RelaxationFactorArray3D> mRelaxationFactor;
-        mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setRelaxationFactor(mRelaxationFactor);
-    }
+        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
+        if (multiPhase1)
+            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
+        if (multiPhase2)
+            vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
+        index += vectorSize1;
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
+        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
+        if (multiPhase1)
+            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
+        if (multiPhase2)
+            vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
+        index += vectorSize2;
 
-    delete[] dataSetSmallArray;
-}
+        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
+        if (multiPhase1)
+            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
+        if (multiPhase2)
+            vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
+        index += vectorSize3;
 
-void MPIIORestartCoProcessor::readPhaseField(int step, int fieldN)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
+        SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                    vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
 
-    MPI_File file_handler;
-    std::string filename;
-    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
-    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
+        if (multiPhase1)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
+        }
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+        SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
+        if (multiPhase2)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                    vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);
+        }
 
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
+        // find the nesessary block and fill it
+        SPtr<Block3D> block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
+   
+        this->lbmKernel->setBlock(block);
+        this->lbmKernel->setNX(std::array<int, 3>{{dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
+        SPtr<LBMKernel> kernel = this->lbmKernel->clone();
+        kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
+        kernel->setCollisionFactor(dataSetArray[n].collFactor);
+        kernel->setDeltaT(dataSetArray[n].deltaT);
+        kernel->setCompressible(dataSetArray[n].compressible);
+        kernel->setWithForcing(dataSetArray[n].withForcing);
+        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
+        kernel->setDensityRatio(dataSetArray[n].densityRatio);
 
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
+        SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
+        dataSetPtr->setFdistributions(mFdistributions);
+        if (multiPhase1)
+            dataSetPtr->setHdistributions(mH1distributions);
+        if (multiPhase2)
+            dataSetPtr->setH2distributions(mH2distributions);
+        kernel->setDataSet(dataSetPtr);
+        block->setKernel(kernel);
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
     if (comm->isRoot()) 
     {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    std::cout << "readPhaseField"<< fieldN<<" = " << dataSetParamStr.nx[0] << " " << dataSetParamStr.nx[1] << " " << dataSetParamStr.nx[2] << std::endl;
+    delete[] dataSetArray;
 
-    for (int n = 0; n < blocksCount; n++)
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
+    //-------------------------------------------------------------
 
-        // fill PhaseField array
-        SPtr<PhaseFieldArray3D> mPhaseField;
-        mPhaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+    DSArraysPresence arrPresence;
+    MPI_File file_handler1;
+    std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpArrays.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler1);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename1);
+    MPI_File_read_at(file_handler1, (MPI_Offset)0, &arrPresence, 1, arrayPresenceType, MPI_STATUS_IGNORE);
+    MPI_File_close(&file_handler1);
 
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        if(fieldN == 1)
-            block->getKernel()->getDataSet()->setPhaseField(mPhaseField);
-        else
-            block->getKernel()->getDataSet()->setPhaseField2(mPhaseField);
-       int nx1 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX1());
-       int nx2 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX2());
-       int nx3 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX3());
-        dataSetParamStr.nx[3] = 1;
-        doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-        std::cout << "writePhaseField" << fieldN << " = " << nx1 << " " << nx2 << " " << nx3 << std::endl;
+    if (arrPresence.isAverageDensityArrayPresent)
+        readArray(step, AverageDensity, std::string("/cpAverageDensityArray.bin"));
 
-    }
+    if (arrPresence.isAverageVelocityArrayPresent)
+        readArray(step, AverageVelocity, std::string("/cpAverageVelocityArray.bin"));
 
-    if (comm->isRoot()) 
-    { 
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
+    if (arrPresence.isAverageFluktuationsArrayPresent)
+        readArray(step, AverageFluktuations, std::string("/cpAverageFluktuationsArray.bin"));
+
+    if (arrPresence.isAverageTripleArrayPresent)
+        readArray(step, AverageTriple, std::string("/cpAverageTripleArray.bin"));
+
+    if (arrPresence.isShearStressValArrayPresent)
+        readArray(step, ShearStressVal, std::string("/cpShearStressValArray.bin"));
+
+    if (arrPresence.isRelaxationFactorPresent)
+        readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
+
+    if (arrPresence.isPhaseField1Present)
+        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
+
+    if (arrPresence.isPhaseField2Present)
+        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
+
+    if (arrPresence.isPressureFieldPresent)
+        readArray(step, PressureField, std::string("/cpPressureField.bin"));
 
-    delete[] dataSetSmallArray;
 }
 
-void MPIIORestartCoProcessor::readPressureField(int step)
+void MPIIORestartCoProcessor::readArray(int step, Arrays arrType, std::string fname)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -3314,30 +1278,32 @@ void MPIIORestartCoProcessor::readPressureField(int step)
 
     if (comm->isRoot())
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField start MPI IO rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start fname = " << fname);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPressureField.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + fname;
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
     // read count of blocks
-    int blocksCount = 0;
+    size_t blocksCount = 0;
     dataSetParam dataSetParamStr;
+    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
+
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
     int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(blocksCount * doubleCountInBlock); // real-values in all blocks
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
@@ -3351,60 +1317,112 @@ void MPIIORestartCoProcessor::readPressureField(int step)
     {
         if (rank == 0)
         {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         }
         else
         {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, (int)blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+        MPI_File_read_at(
+            file_handler,
+            (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+            &doubleValuesArray[0], (int)blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
     if (comm->isRoot())
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    //----------------------------- restore data ---------------------------------
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
+
     size_t index = 0;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-
-    for (int n = 0; n < blocksCount; n++)
+    std::vector<real> vectorsOfValues;
+    for (std::size_t n = 0; n < blocksCount; n++)
     {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
-    
-        // fill Pressure array
-        SPtr<PressureFieldArray3D> mPressureField;
-        mPressureField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
 
         // find the nesessary block and fill it
         SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setPressureField(mPressureField);
+
+       // fill arrays
+       switch (arrType)
+        {
+        case AverageDensity:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
+            break;
+        case AverageVelocity:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
+            break;
+        case AverageFluktuations:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
+            break;
+        case AverageTriple:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
+            break;
+        case ShearStressVal:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
+            break;
+        case RelaxationFactor:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
+            break;
+        case PhaseField1:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPhaseField(___3DArray);
+            break;
+        case PhaseField2:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
+            break;
+        case PressureField:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPressureField(___3DArray);
+            break;
+        default:
+            UB_THROW(UbException(UB_EXARGS, "MPIIORestartCoProcessor::readArray : array type does not exist!"));
+            break;
+        }
     }
 
     if (comm->isRoot())
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField end of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray end of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
-}*/
+}
 
 void MPIIORestartCoProcessor::readBoundaryConds(int step)
 {
@@ -3418,8 +1436,8 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
index 1a1e1fb4d45066a93826fe7a819b056e10544036..a4c1b32efbafbdb467eee7facede5f8d834e1a93 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
@@ -35,7 +35,7 @@ public:
     MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIORestartCoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
index 53e98e9e107e0cc91fccf6e59afae18ea9a0e931..f1762b5fdcfa8a2d4b20ee95665a2a3329e85196 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
@@ -22,7 +22,7 @@ MicrophoneArrayCoProcessor::MicrophoneArrayCoProcessor(SPtr<Grid3D> grid, SPtr<U
 
 MicrophoneArrayCoProcessor::~MicrophoneArrayCoProcessor() = default;
 
-void MicrophoneArrayCoProcessor::process(double step)
+void MicrophoneArrayCoProcessor::process(real step)
 {
     if (microphones.size() > 0) {
         collectData(step);
@@ -89,20 +89,20 @@ bool MicrophoneArrayCoProcessor::addMicrophone(Vector3D coords)
     return false;
 }
 
-void MicrophoneArrayCoProcessor::collectData(double step)
+void MicrophoneArrayCoProcessor::collectData(real step)
 {
     for (std::size_t i = 0; i < microphones.size(); i++) {
-        LBMReal f[D3Q27System::ENDF + 1];
+        real f[D3Q27System::ENDF + 1];
         microphones[i]->distridution->getDistribution(f, val<1>(microphones[i]->nodeIndexes),
                                                       val<2>(microphones[i]->nodeIndexes),
                                                       val<3>(microphones[i]->nodeIndexes));
-        LBMReal vx1, vx2, vx3, rho;
+        real vx1, vx2, vx3, rho;
         calcMacros(f, rho, vx1, vx2, vx3);
         *strVector[i] << step << ';' << rho << '\n';
     }
 }
 
-void MicrophoneArrayCoProcessor::writeFile(double /*step*/)
+void MicrophoneArrayCoProcessor::writeFile(real /*step*/)
 {
     for (std::size_t i = 0; i < microphones.size(); i++) {
         std::string fname = path + "/mic/mic_" + UbSystem::toString(microphones[i]->id) + ".csv";
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
index a10f30440c8539677511af6f7ac40fbe257d4eaf..140ac5a48405adb96b64941144a13fa6790a9e8c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
@@ -27,14 +27,14 @@ public:
     ~MicrophoneArrayCoProcessor() override;
 
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
 
     //! add microphone
     bool addMicrophone(Vector3D coords);
 
 protected:
-    void collectData(double step);
-    void writeFile(double step);
+    void collectData(real step);
+    void writeFile(real step);
 
 private:
     std::string path;
@@ -52,7 +52,7 @@ private:
     int count;
     int micID;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
index 633ffd26f3ed77c58ac83200fdf18cb6f0385979..af8cf408369454127a1fd246cf19f2e9fecefc96 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
@@ -44,16 +44,16 @@ NUPSCounterCoProcessor::NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
     if (comm->getProcessID() == comm->getRoot()) {
         timer.resetAndStart();
 
-        double nop          = comm->getNumberOfProcesses();
+        real nop          = comm->getNumberOfProcesses();
         int minInitLevel    = grid->getCoarsestInitializedLevel();
         int maxInitLevel    = grid->getFinestInitializedLevel();
         UbTupleInt3 blocknx = grid->getBlockNX();
-        double nod          = (double)(val<1>(blocknx)) * (double)(val<2>(blocknx)) * (double)(val<3>(blocknx));
+        real nod          = (real)(val<1>(blocknx)) * (real)(val<2>(blocknx)) * (real)(val<3>(blocknx));
         nup                 = 0;
 
         for (int level = minInitLevel; level <= maxInitLevel; level++) {
             int nob = grid->getNumberOfBlocks(level);
-            nup_t += (double)(1 << level) * nob * nod;
+            nup_t += (real)(1 << level) * nob * nod;
         }
         nup = nup_t / nop;
     }
@@ -61,19 +61,19 @@ NUPSCounterCoProcessor::NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
 //////////////////////////////////////////////////////////////////////////
 NUPSCounterCoProcessor::~NUPSCounterCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void NUPSCounterCoProcessor::process(double step)
+void NUPSCounterCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void NUPSCounterCoProcessor::collectData(double step)
+void NUPSCounterCoProcessor::collectData(real step)
 {
     if (comm->getProcessID() == comm->getRoot()) {
-        double time   = timer.stop();
-        double nups_t = nup_t * (step - nupsStep) / time;
-        double nups   = nup * (step - nupsStep) / time;
-        double tnups  = nups / (double)numOfThreads;
+        real time   = timer.stop();
+        real nups_t = nup_t * (step - nupsStep) / time;
+        real nups   = nup * (step - nupsStep) / time;
+        real tnups  = nups / (real)numOfThreads;
         UBLOG(logINFO, "Calculation step = " << step);
         UBLOG(logINFO, "Total performance = " << nups_t << " NUPS");
         UBLOG(logINFO, "Performance per process = " << nups << " NUPS");
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
index ce6b16996824be9e614e131c6e05fad0d1a507fd..b178c97fe25647c7bec60883811a3263abc046bc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
@@ -57,19 +57,19 @@ public:
     NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, std::shared_ptr<vf::mpi::Communicator> comm);
     ~NUPSCounterCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for calculation of NUPS
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     UbTimer timer;
     int numOfThreads;
-    double numberOfNodes;
-    double numberOfBlocks;
-    double nup;
-    double nup_t;
-    double nupsStep;
+    real numberOfNodes;
+    real numberOfBlocks;
+    real nup;
+    real nup_t;
+    real nupsStep;
     std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
index ae385117c311eabfe2c5b98c8c2c45f4cd7473cd..4197c5cfe7e9d8f0f9da618ff58f4b421ae3d4fa 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
@@ -23,7 +23,7 @@ PressureCoefficientCoProcessor::PressureCoefficientCoProcessor(SPtr<Grid3D> grid
 //////////////////////////////////////////////////////////////////////////
 PressureCoefficientCoProcessor::~PressureCoefficientCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void PressureCoefficientCoProcessor::process(double step)
+void PressureCoefficientCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -31,7 +31,7 @@ void PressureCoefficientCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void PressureCoefficientCoProcessor::collectData(double step)
+void PressureCoefficientCoProcessor::collectData(real step)
 {
     calculateRho();
 
@@ -42,10 +42,10 @@ void PressureCoefficientCoProcessor::collectData(double step)
 //////////////////////////////////////////////////////////////////////////
 void PressureCoefficientCoProcessor::calculateRho()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
+    std::vector<real> values;
+    std::vector<real> rvalues;
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         typedef std::map<SPtr<Block3D>, std::set<std::vector<int>>> TransNodeIndicesMap;
@@ -60,7 +60,7 @@ void PressureCoefficientCoProcessor::calculateRho()
             UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
             //         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
             UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-            double dx                 = grid->getDeltaX(block);
+            real dx                 = grid->getDeltaX(block);
 
             if (kernel->getCompressible()) {
                 calcMacros = &D3Q27System::calcCompMacroscopicValues;
@@ -89,9 +89,9 @@ void PressureCoefficientCoProcessor::calculateRho()
                         x1, x2,
                         x3)) // es kann sein, dass der node von einem anderen interactor z.B. als solid gemarkt wurde!!!
                 {
-                    double cx1 = val<1>(org) - val<1>(nodeOffset) + x1 * dx;
-                    double cx2 = val<2>(org) - val<2>(nodeOffset) + x2 * dx;
-                    double cx3 = val<3>(org) - val<3>(nodeOffset) + x3 * dx;
+                    real cx1 = val<1>(org) - val<1>(nodeOffset) + x1 * dx;
+                    real cx2 = val<2>(org) - val<2>(nodeOffset) + x2 * dx;
+                    real cx3 = val<3>(org) - val<3>(nodeOffset) + x3 * dx;
                     if (plane->isPointInGbObject3D(cx1, cx2, cx3)) {
                         distributions->getDistribution(f, x1, x2, x3);
                         calcMacros(f, rho, vx1, vx2, vx3);
@@ -172,7 +172,7 @@ void PressureCoefficientCoProcessor::writeValues(int step)
                 throw UbException(UB_EXARGS, "couldn't open file " + fname);
         }
 
-        out.write((char *)&outValues[0], outValues.size() * sizeof(double));
+        out.write((char *)&outValues[0], outValues.size() * sizeof(real));
 
         out.close();
 
@@ -193,7 +193,7 @@ void PressureCoefficientCoProcessor::readValues(int step)
         int length = (int)in.tellg();
         in.seekg(0, in.beg);
 
-        outValues.resize(length / sizeof(double));
+        outValues.resize(length / sizeof(real));
 
         in.read((char *)&outValues[0], length);
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
index 42927a7315d620e60c3af5c4285a89c18609cee7..26b8117aea007671bc1d6b17104f015cd62ddda3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
@@ -22,13 +22,13 @@ public:
                                    const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureCoefficientCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     void addInteractor(SPtr<D3Q27Interactor> interactor);
     void readValues(int step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void calculateRho();
     void writeValues(int step);
 
@@ -38,15 +38,15 @@ private:
     std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     int numberOfSteps;
-    double maxStep;
+    real maxStep;
 
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
 
-    std::vector<double> outValues;
+    std::vector<real> outValues;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
index 74cd5a09c71b717f138090892b51b12a721f60ab..a486da249e5c2ce2eeaaf53fa4601d39bda689b0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
@@ -17,8 +17,8 @@
 
 PressureDifferenceCoProcessor::PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                              const std::string &path, SPtr<IntegrateValuesHelper> h1,
-                                                             SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
-                                                             LBMReal uReal, LBMReal uLB, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                             SPtr<IntegrateValuesHelper> h2, real rhoReal,
+                                                             real uReal, real uLB, std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), h1(h1), h2(h2), comm(comm)
 {
@@ -71,13 +71,13 @@ PressureDifferenceCoProcessor::PressureDifferenceCoProcessor(SPtr<Grid3D> grid,
 //////////////////////////////////////////////////////////////////////////
 PressureDifferenceCoProcessor::~PressureDifferenceCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void PressureDifferenceCoProcessor::process(double step)
+void PressureDifferenceCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void PressureDifferenceCoProcessor::collectData(double step)
+void PressureDifferenceCoProcessor::collectData(real step)
 {
     h1->calculateMQ();
     h2->calculateMQ();
@@ -85,13 +85,13 @@ void PressureDifferenceCoProcessor::collectData(double step)
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = static_cast<int>(step);
         std::ofstream ostr;
-        double nn1  = h1->getNumberOfFluidsNodes();
-        double nn2  = h2->getNumberOfFluidsNodes();
-        double rho1 = h1->getRho();
-        double rho2 = h2->getRho();
-        double p1_1 = (rho1 / nn1) * factor1;
-        double p1_2 = (rho2 / nn2) * factor1;
-        double dp1  = p1_1 - p1_2;
+        real nn1  = h1->getNumberOfFluidsNodes();
+        real nn2  = h2->getNumberOfFluidsNodes();
+        real rho1 = h1->getRho();
+        real rho2 = h2->getRho();
+        real p1_1 = (rho1 / nn1) * factor1;
+        real p1_2 = (rho2 / nn2) * factor1;
+        real dp1  = p1_1 - p1_2;
 
         // double press1 = h1->getPress();
         // double press2 = h2->getPress();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
index 6de68a977904d5cc25ee37395eff4c9e66748eb4..09523552289297b78fb59b66e86e7ba84e1ed00b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
@@ -24,22 +24,22 @@ class PressureDifferenceCoProcessor : public CoProcessor
 {
 public:
     PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                  SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
-                                  LBMReal uReal, LBMReal uLB,
+                                  SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, real rhoReal,
+                                  real uReal, real uLB,
                                   /*const SPtr<LBMUnitConverter> conv,*/ std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureDifferenceCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     SPtr<IntegrateValuesHelper> h1, h2;
     std::string path;
     SPtr<LBMUnitConverter> conv;
-    void collectData(double step);
+    void collectData(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
-    LBMReal factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
+    real factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
                      //rhoReal and uReal in SI
-    LBMReal factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
+    real factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
                      //uReal in SI
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
index 1fbdb6f7f40a9b126cfa174d8cef7d7516ff884a..4e62a1c6bbb4c9f9a74968170c5821cc0f46fd23 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
@@ -32,7 +32,7 @@ void QCriterionCoProcessor::init()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void QCriterionCoProcessor::process(double step)
+void QCriterionCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -40,7 +40,7 @@ void QCriterionCoProcessor::process(double step)
     UBLOG(logDEBUG3, "QCriterionCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void QCriterionCoProcessor::collectData(double step)
+void QCriterionCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -92,7 +92,7 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //	UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -135,36 +135,36 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
 
                     /////////////////////////////
                     // Geschwindigkeitsvektoren
-                    LBMReal vE[3];
-                    LBMReal vW[3];
-                    LBMReal vN[3];
-                    LBMReal vS[3];
-                    LBMReal vT[3];
-                    LBMReal vB[3];
+                    real vE[3];
+                    real vW[3];
+                    real vN[3];
+                    real vS[3];
+                    real vT[3];
+                    real vB[3];
                     // hole geschwindigkeiten an nachbarknoten
                     getNeighborVelocities(1, 0, 0, ix1, ix2, ix3, block, vE, vW);
                     getNeighborVelocities(0, 1, 0, ix1, ix2, ix3, block, vN, vS);
                     getNeighborVelocities(0, 0, 1, ix1, ix2, ix3, block, vT, vB);
                     //////////////////////////////////
                     // derivatives
-                    LBMReal duxdy = (vN[xdir] - vS[xdir]) * 0.5;
-                    LBMReal duydx = (vE[ydir] - vW[ydir]) * 0.5;
-                    LBMReal duxdz = (vT[xdir] - vB[xdir]) * 0.5;
-                    LBMReal duzdx = (vE[zdir] - vW[zdir]) * 0.5;
-                    LBMReal duydz = (vT[ydir] - vB[ydir]) * 0.5;
-                    LBMReal duzdy = (vN[zdir] - vS[zdir]) * 0.5;
-
-                    LBMReal duxdx = (vE[xdir] - vW[xdir]) * 0.5;
-                    LBMReal duydy = (vN[ydir] - vS[ydir]) * 0.5;
-                    LBMReal duzdz = (vT[zdir] - vB[zdir]) * 0.5;
-
-                    LBMReal scaleFactor =
-                        (double)(1
+                    real duxdy = (vN[xdir] - vS[xdir]) * 0.5;
+                    real duydx = (vE[ydir] - vW[ydir]) * 0.5;
+                    real duxdz = (vT[xdir] - vB[xdir]) * 0.5;
+                    real duzdx = (vE[zdir] - vW[zdir]) * 0.5;
+                    real duydz = (vT[ydir] - vB[ydir]) * 0.5;
+                    real duzdy = (vN[zdir] - vS[zdir]) * 0.5;
+
+                    real duxdx = (vE[xdir] - vW[xdir]) * 0.5;
+                    real duydy = (vN[ydir] - vS[ydir]) * 0.5;
+                    real duzdz = (vT[zdir] - vB[zdir]) * 0.5;
+
+                    real scaleFactor =
+                        (real)(1
                                  << (currentLevel -
                                      minInitLevel)); // pow(2.0,(double)(currentLevel-minInitLevel));//finer grid ->
                                                      // current level higher. coarsest grid: currentLevel=minInitLevel=0
                     // Q=-0.5*(S_ij S_ij - Omega_ij Omega_ij) => regions where vorticity is larger than strain rate
-                    LBMReal q = -(duxdy * duydx + duxdz * duzdx + duydz * duzdy + duxdx * duxdx + duydy * duydy +
+                    real q = -(duxdy * duydx + duxdz * duzdx + duydz * duzdy + duxdx * duxdx + duydy * duydy +
                                   duzdz * duzdz) *
                                 scaleFactor;
 
@@ -201,7 +201,7 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz, int ix1, int ix2, int ix3,
-                                                  const SPtr<Block3D> block, LBMReal *vE, LBMReal *vW)
+                                                  const SPtr<Block3D> block, real *vE, real *vW)
 {
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -234,9 +234,9 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
     if ((ix1 == 0 && offx == 1) || (ix2 == 0 && offy == 1) || (ix3 == 0 && offz == 1)) {
         int RankNeighborW;
         Vector3D orgNodeRW = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-        double xp000       = orgNodeRW[0];
-        double yp000       = orgNodeRW[1];
-        double zp000       = orgNodeRW[2];
+        real xp000       = orgNodeRW[0];
+        real yp000       = orgNodeRW[1];
+        real zp000       = orgNodeRW[2];
 
         int currentLevel         = block->getLevel();
         UbTupleInt3 blockIndexes = grid->getBlockIndexes(xp000, yp000, zp000, currentLevel);
@@ -282,12 +282,12 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
             SPtr<ILBMKernel> kernelW                 = blockNeighW->getKernel();
             SPtr<BCArray3D> bcArrayW                 = kernelW->getBCProcessor()->getBCArray();
             SPtr<DistributionArray3D> distributionsW = kernelW->getDataSet()->getFdistributions();
-            LBMReal fW2[27];
-            LBMReal fW[27];
-            LBMReal f0[27];
-            LBMReal fE[27];
-            LBMReal v0[3];
-            LBMReal vW2[3];
+            real fW2[27];
+            real fW[27];
+            real f0[27];
+            real fE[27];
+            real v0[3];
+            real vW2[3];
             // distributionsW->getDistribution(fW2, std::max(ix1+2*offx,1), std::max(ix2+2*offy,1),
             // std::max(ix3+2*offz,1)); distributionsW->getDistribution(fW, std::max(ix1+offx,1), std::max(ix2+offy,1),
             // std::max(ix3+offz,1)); distributionsW->getDistribution(f0, std::max(ix1    ,1), std::max(ix2    ,1),
@@ -314,7 +314,7 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
             SPtr<ILBMKernel> kernelW                 = blockNeighW->getKernel();
             SPtr<BCArray3D> bcArrayW                 = kernelW->getBCProcessor()->getBCArray();
             SPtr<DistributionArray3D> distributionsW = kernelW->getDataSet()->getFdistributions();
-            LBMReal fW[27];
+            real fW[27];
 
             if (offx == 1) {
                 distributionsW->getDistribution(fW, (distributions->getNX1()) - 1, ix2,
@@ -330,20 +330,20 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
 
     } else {
         // data available in current block:
-        LBMReal fW[27];
+        real fW[27];
         distributions->getDistribution(fW, ix1 - offx, ix2 - offy, ix3 - offz);
         computeVelocity(fW, vW, compressible);
     }
     if (checkInterpolation) {
         // in plus-direction data is available in current block because of ghost layers
-        LBMReal fE[27];
+        real fE[27];
         distributions->getDistribution(fE, ix1 + offx, ix2 + offy, ix3 + offz); // E:= plus 1
         computeVelocity(fE, vE, compressible);
     }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void QCriterionCoProcessor::computeVelocity(LBMReal *f, LBMReal *v, bool compressible)
+void QCriterionCoProcessor::computeVelocity(real *f, real *v, bool compressible)
 {
     //////////////////////////////////////////////////////////////////////////
     // compute x,y,z-velocity components from distribution
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
index 55f0df5a2e8aaaf933babb70d6b9c5246424c34c..38cd47fb890ffc79bb3f43ecc17bbe42885fa114 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
@@ -31,27 +31,27 @@ public:
     QCriterionCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
                           std::shared_ptr<vf::mpi::Communicator> comm);
     //! Make update if timestep is write-timestep specified in SPtr<UbScheduler> s
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Q is computed for all points in a block. Data for writing is added to data and cell vectors.
     void addData(const SPtr<Block3D> block);
     //! After writing to .vtk-file, all vectors are reset
     void clearData();
     //! Computes macroscopic velocities
-    void computeVelocity(LBMReal *f, LBMReal *v, bool compressible);
+    void computeVelocity(real *f, real *v, bool compressible);
     //! Computes average and RMS values of macroscopic quantities
     void getNeighborVelocities(int offx, int offy, int offz, int ix1, int ix2, int ix3, const SPtr<Block3D> block,
-                               LBMReal *vE, LBMReal *vW);
+                               real *vE, real *vW);
 
 private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames; // only one entry for QKrit-CoProcessor: Q
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel; // go through all levels for block vector of current process from minInitLevel to maxInitLevel
     int maxInitLevel;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
index 64ecc177ff38403f346a519e8d0a5515a12713e4..cd1f9c54cb50585b572a61cdc7d8c884386b864c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
@@ -38,7 +38,7 @@ ShearStressCoProcessor::ShearStressCoProcessor(SPtr<Grid3D> grid, const std::str
 //////////////////////////////////////////////////////////////////////////
 ShearStressCoProcessor::~ShearStressCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::process(double step)
+void ShearStressCoProcessor::process(real step)
 {
     if (step == 0) {
         initDistance();
@@ -49,7 +49,7 @@ void ShearStressCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ShearStressCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::collectData(double step)
+void ShearStressCoProcessor::collectData(real step)
 {
     using namespace std;
 
@@ -122,12 +122,13 @@ void ShearStressCoProcessor::clearData()
     data.clear();
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::calculateShearStress(double timeStep)
+void ShearStressCoProcessor::calculateShearStress(real timeStep)
 {
+    using namespace vf::lbm::dir;
     using namespace D3Q27System;
 
-    LBMReal f[27];
-    LBMReal vx, vy, vz, sxx, syy, szz, sxy, syz, sxz;
+    real f[27];
+    real vx, vy, vz, sxx, syy, szz, sxy, syz, sxz;
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         typedef std::map<SPtr<Block3D>, std::set<std::vector<int>>> TransNodeIndicesMap;
@@ -141,7 +142,7 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer     = kernel->getGhostLayerWidth();
-            LBMReal collFactor = kernel->getCollisionFactor();
+            real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -160,8 +161,8 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
                     continue;
 
                 if (bcArray->isFluid(ix1, ix2, ix3)) {
-                    double q        = (*ssv)(normalq, ix1, ix2, ix3);
-                    double numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
+                    real q        = (*ssv)(normalq, ix1, ix2, ix3);
+                    real numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
                     if (q == 0 || numPoint != 3)
                         continue;
                     // if (q==0)continue;
@@ -193,11 +194,11 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
                           (((f[DIR_PPP] + f[DIR_MMM]) - (f[DIR_PMP] + f[DIR_MPM])) + ((f[DIR_PMM] + f[DIR_MPP]) - (f[DIR_MMP] + f[DIR_PPM])) +
                            (-(f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM])) - vy * vz);
 
-                    LBMReal dxxMyy = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
+                    real dxxMyy = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
                                      (((f[DIR_P0P] + f[DIR_M0M]) + (f[DIR_P0M] + f[DIR_M0P])) - ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM])) +
                                       ((f[DIR_P00] + f[DIR_M00]) - (f[DIR_0P0] + f[DIR_0M0])) - vx * vx + vy * vy);
 
-                    LBMReal dxxMzz = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
+                    real dxxMzz = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
                                      ((((f[DIR_PP0] + f[DIR_MM0]) + (f[DIR_PM0] + f[DIR_MP0])) - ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM]))) +
                                       ((f[DIR_P00] + f[DIR_M00]) - (f[DIR_00P] + f[DIR_00M])) - vx * vx + vz * vz);
 
@@ -249,7 +250,7 @@ void ShearStressCoProcessor::addData()
             UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
             //         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
             UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-            double dx                 = grid->getDeltaX(block);
+            real dx                 = grid->getDeltaX(block);
 
             SPtr<ILBMKernel> kernel                 = block->getKernel();
             SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -257,7 +258,7 @@ void ShearStressCoProcessor::addData()
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer     = kernel->getGhostLayerWidth();
-            LBMReal collFactor = kernel->getCollisionFactor();
+            real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -281,8 +282,8 @@ void ShearStressCoProcessor::addData()
                     continue;
 
                 if (bcArray->isFluid(ix1, ix2, ix3)) {
-                    double q        = (*ssv)(normalq, ix1, ix2, ix3);
-                    double numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
+                    real q        = (*ssv)(normalq, ix1, ix2, ix3);
+                    real numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
                     if (q == 0 || numPoint != 3)
                         continue;
                     // if (q==0)continue;
@@ -293,7 +294,7 @@ void ShearStressCoProcessor::addData()
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
                     //////get normal and distance//////
-                    double A, B, C;
+                    real A, B, C;
                     A = (*ssv)(normalX1, ix1, ix2, ix3);
                     B = (*ssv)(normalX2, ix1, ix2, ix3);
                     C = (*ssv)(normalX3, ix1, ix2, ix3);
@@ -306,35 +307,35 @@ void ShearStressCoProcessor::addData()
                     // vtySonja = (*av)(ix1,ix2,ix3,AvVy)-normals[1]*temp;
                     // vtzSonja = (*av)(ix1,ix2,ix3,AvVz)-normals[2]*temp;
 
-                    double vtx = (B * B * (*ssv)(AvVx, ix1, ix2, ix3) + C * C * (*ssv)(AvVx, ix1, ix2, ix3) -
+                    real vtx = (B * B * (*ssv)(AvVx, ix1, ix2, ix3) + C * C * (*ssv)(AvVx, ix1, ix2, ix3) -
                                   A * B * (*ssv)(AvVy, ix1, ix2, ix3) - A * C * (*ssv)(AvVy, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
-                    double vty = (-(A * B * (*ssv)(AvVx, ix1, ix2, ix3)) + A * A * (*ssv)(AvVy, ix1, ix2, ix3) +
+                    real vty = (-(A * B * (*ssv)(AvVx, ix1, ix2, ix3)) + A * A * (*ssv)(AvVy, ix1, ix2, ix3) +
                                   C * C * (*ssv)(AvVy, ix1, ix2, ix3) - B * C * (*ssv)(AvVz, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
-                    double vtz = (-(A * C * (*ssv)(AvVx, ix1, ix2, ix3)) - B * C * (*ssv)(AvVy, ix1, ix2, ix3) +
+                    real vtz = (-(A * C * (*ssv)(AvVx, ix1, ix2, ix3)) - B * C * (*ssv)(AvVy, ix1, ix2, ix3) +
                                   A * A * (*ssv)(AvVz, ix1, ix2, ix3) + B * B * (*ssv)(AvVz, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
 
-                    double normVt = sqrt(vtx * vtx + vty * vty + vtz * vtz) + 1e-100;
-                    double nvtx   = vtx / normVt;
-                    double nvty   = vty / normVt;
-                    double nvtz   = vtz / normVt;
+                    real normVt = sqrt(vtx * vtx + vty * vty + vtz * vtz) + 1e-100;
+                    real nvtx   = vtx / normVt;
+                    real nvty   = vty / normVt;
+                    real nvtz   = vtz / normVt;
 
-                    double sx   = 0.5 * ((*ssv)(AvSxx, ix1, ix2, ix3) * nvtx + (*ssv)(AvSxy, ix1, ix2, ix3) * nvty +
+                    real sx   = 0.5 * ((*ssv)(AvSxx, ix1, ix2, ix3) * nvtx + (*ssv)(AvSxy, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSxz, ix1, ix2, ix3) * nvtz);
-                    double sy   = 0.5 * ((*ssv)(AvSxy, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyy, ix1, ix2, ix3) * nvty +
+                    real sy   = 0.5 * ((*ssv)(AvSxy, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyy, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSyz, ix1, ix2, ix3) * nvtz);
-                    double sz   = 0.5 * ((*ssv)(AvSxz, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyz, ix1, ix2, ix3) * nvty +
+                    real sz   = 0.5 * ((*ssv)(AvSxz, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyz, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSzz, ix1, ix2, ix3) * nvtz);
-                    double sabs = sqrt(sx * sx + sy * sy + sz * sz);
+                    real sabs = sqrt(sx * sx + sy * sy + sz * sz);
 
-                    double viscosity = (1.0 / 3.0) * (1.0 / collFactor - 0.5);
-                    double rho       = 1.0;
-                    double utau      = sqrt(viscosity / rho * sabs);
+                    real viscosity = (1.0 / 3.0) * (1.0 / collFactor - 0.5);
+                    real rho       = 1.0;
+                    real utau      = sqrt(viscosity / rho * sabs);
 
                     // double q=(*av)(ix1,ix2,ix3,normalq) ;
-                    double yPlus = (utau * q) / viscosity;
+                    real yPlus = (utau * q) / viscosity;
 
                     data[index++].push_back(yPlus);
                     data[index++].push_back(utau);
@@ -344,7 +345,7 @@ void ShearStressCoProcessor::addData()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::reset(double step)
+void ShearStressCoProcessor::reset(real step)
 {
     if (Resetscheduler->isDue(step))
         resetData(step);
@@ -352,7 +353,7 @@ void ShearStressCoProcessor::reset(double step)
     UBLOG(logDEBUG3, "resetCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::resetData(double /*step*/)
+void ShearStressCoProcessor::resetData(real /*step*/)
 {
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (const auto &block : blockVector[level]) {
@@ -404,14 +405,16 @@ void ShearStressCoProcessor::resetData(double /*step*/)
 //////////////////////////////////////////////////////////////////////////
 void ShearStressCoProcessor::addInteractor(SPtr<D3Q27Interactor> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, double &A,
-                                       double &B, double &C, double &D, double &ii)
+void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, real &A,
+                                       real &B, real &C, real &D, real &ii)
 {
-    double x1plane = 0.0, y1plane = 0.0, z1plane = 0.0;
-    double x2plane = 0.0, y2plane = 0.0, z2plane = 0.0;
-    double x3plane = 0.0, y3plane = 0.0, z3plane = 0.0;
+    using namespace vf::lbm::dir;
+
+    real x1plane = 0.0, y1plane = 0.0, z1plane = 0.0;
+    real x2plane = 0.0, y2plane = 0.0, z2plane = 0.0;
+    real x3plane = 0.0, y3plane = 0.0, z3plane = 0.0;
     SPtr<BoundaryConditions> bcPtr;
-    double dx                               = grid->getDeltaX(block);
+    real dx                               = grid->getDeltaX(block);
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -562,32 +565,32 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 "ix2=" + UbSystem::toString(ix2) + "ix3=" + UbSystem::toString(ix3) +
                                                 "GlobalID=" + UbSystem::toString(block->getGlobalID()) +
                                                 "dx=" + UbSystem::toString(dx) +
-                                                "T=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00P)) +
-                                                "B=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00M)) +
-                                                "E=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P00)) +
-                                                "W=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M00)) +
-                                                "N=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0P0)) +
-                                                "S=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0M0)) +
-                                                "NE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PP0)) +
-                                                "SW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MM0)) +
-                                                "SE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PM0)) +
-                                                "NW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MP0)) +
-                                                "TE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0P)) +
-                                                "BW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0M)) +
-                                                "BE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0M)) +
-                                                "TW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0P)) +
-                                                "TN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PP)) +
-                                                "BS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MM)) +
-                                                "BN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PM)) +
-                                                "TS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MP)) +
-                                                "TNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPP)) +
-                                                "TNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPP)) +
-                                                "TSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMP)) +
-                                                "TSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMP)) +
-                                                "BNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPM)) +
-                                                "BNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPM)) +
-                                                "BSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMM)) +
-                                                "BSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMM) * dx)));
+                                                "T=" + UbSystem::toString(bcPtr->getQ(DIR_00P)) +
+                                                "B=" + UbSystem::toString(bcPtr->getQ(DIR_00M)) +
+                                                "E=" + UbSystem::toString(bcPtr->getQ(DIR_P00)) +
+                                                "W=" + UbSystem::toString(bcPtr->getQ(DIR_M00)) +
+                                                "N=" + UbSystem::toString(bcPtr->getQ(DIR_0P0)) +
+                                                "S=" + UbSystem::toString(bcPtr->getQ(DIR_0M0)) +
+                                                "NE=" + UbSystem::toString(bcPtr->getQ(DIR_PP0)) +
+                                                "SW=" + UbSystem::toString(bcPtr->getQ(DIR_MM0)) +
+                                                "SE=" + UbSystem::toString(bcPtr->getQ(DIR_PM0)) +
+                                                "NW=" + UbSystem::toString(bcPtr->getQ(DIR_MP0)) +
+                                                "TE=" + UbSystem::toString(bcPtr->getQ(DIR_P0P)) +
+                                                "BW=" + UbSystem::toString(bcPtr->getQ(DIR_M0M)) +
+                                                "BE=" + UbSystem::toString(bcPtr->getQ(DIR_P0M)) +
+                                                "TW=" + UbSystem::toString(bcPtr->getQ(DIR_M0P)) +
+                                                "TN=" + UbSystem::toString(bcPtr->getQ(DIR_0PP)) +
+                                                "BS=" + UbSystem::toString(bcPtr->getQ(DIR_0MM)) +
+                                                "BN=" + UbSystem::toString(bcPtr->getQ(DIR_0PM)) +
+                                                "TS=" + UbSystem::toString(bcPtr->getQ(DIR_0MP)) +
+                                                "TNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPP)) +
+                                                "TNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPP)) +
+                                                "TSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMP)) +
+                                                "TSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMP)) +
+                                                "BNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPM)) +
+                                                "BNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPM)) +
+                                                "BSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMM)) +
+                                                "BSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMM) * dx)));
         }
     }
 
@@ -597,18 +600,18 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                 for (int k = z; k <= z + 1; k++) {
                     Vector3D pointplane1 = grid->getNodeCoordinates(block, i, j, k);
 
-                    double iph = pointplane1[0];
-                    double jph = pointplane1[1];
-                    double kph = pointplane1[2];
+                    real iph = pointplane1[0];
+                    real jph = pointplane1[1];
+                    real kph = pointplane1[2];
 
                     if (!bcArray->isSolid(i, j, k)) {
                         SPtr<BoundaryConditions> bcPtrIn = bcArray->getBC(i, j, k);
                         if (bcPtrIn) {
                             for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
                                 if (ii <= 2) {
-                                    LBMReal q = bcPtrIn->getQ(fdir);
+                                    real q = bcPtrIn->getQ(fdir);
                                     if (q != 999.00000) {
-                                        if (fdir == D3Q27System::DIR_P00) {
+                                        if (fdir == DIR_P00) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (i + q <= x + 1) {
                                                 if (ii == 0) {
@@ -634,7 +637,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_M00) {
+                                        if (fdir == DIR_M00) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (i - q >= x) {
                                                 if (ii == 0) {
@@ -660,7 +663,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_0P0) {
+                                        if (fdir == DIR_0P0) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (j + q <= y + 1) {
                                                 if (ii == 0) {
@@ -686,7 +689,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_0M0) {
+                                        if (fdir == DIR_0M0) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (j - q >= y) {
                                                 if (ii == 0) {
@@ -713,7 +716,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                             }
                                         }
 
-                                        if (fdir == D3Q27System::DIR_00P) {
+                                        if (fdir == DIR_00P) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (k + q <= z + 1) {
                                                 if (ii == 0) {
@@ -739,7 +742,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_00M) {
+                                        if (fdir == DIR_00M) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (k - q >= z) {
                                                 if (ii == 0) {
@@ -788,32 +791,32 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                     UB_EXARGS, "ii is=" + UbSystem::toString(ii) + "  ix1=" + UbSystem::toString(ix1) +
                                    " ix2=" + UbSystem::toString(ix2) + " ix3=" + UbSystem::toString(ix3) +
                                    " Block3D::GlobalID=" + UbSystem::toString(block->getGlobalID()) + " dx=" +
-                                   UbSystem::toString(dx) + " T=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00P)) +
-                                   " B=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00M)) +
-                                   " E=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P00)) +
-                                   " W=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M00)) +
-                                   " N=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0P0)) +
-                                   " S=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0M0)) +
-                                   " NE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PP0)) +
-                                   " SW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MM0)) +
-                                   " SE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PM0)) +
-                                   " NW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MP0)) +
-                                   " TE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0P)) +
-                                   " BW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0M)) +
-                                   " BE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0M)) +
-                                   " TW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0P)) +
-                                   " TN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PP)) +
-                                   " BS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MM)) +
-                                   " BN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PM)) +
-                                   " TS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MP)) +
-                                   " TNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPP)) +
-                                   " TNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPP)) +
-                                   " TSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMP)) +
-                                   " TSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMP)) +
-                                   " BNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPM)) +
-                                   " BNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPM)) +
-                                   " BSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMM)) +
-                                   " BSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMM))));
+                                   UbSystem::toString(dx) + " T=" + UbSystem::toString(bcPtr->getQ(DIR_00P)) +
+                                   " B=" + UbSystem::toString(bcPtr->getQ(DIR_00M)) +
+                                   " E=" + UbSystem::toString(bcPtr->getQ(DIR_P00)) +
+                                   " W=" + UbSystem::toString(bcPtr->getQ(DIR_M00)) +
+                                   " N=" + UbSystem::toString(bcPtr->getQ(DIR_0P0)) +
+                                   " S=" + UbSystem::toString(bcPtr->getQ(DIR_0M0)) +
+                                   " NE=" + UbSystem::toString(bcPtr->getQ(DIR_PP0)) +
+                                   " SW=" + UbSystem::toString(bcPtr->getQ(DIR_MM0)) +
+                                   " SE=" + UbSystem::toString(bcPtr->getQ(DIR_PM0)) +
+                                   " NW=" + UbSystem::toString(bcPtr->getQ(DIR_MP0)) +
+                                   " TE=" + UbSystem::toString(bcPtr->getQ(DIR_P0P)) +
+                                   " BW=" + UbSystem::toString(bcPtr->getQ(DIR_M0M)) +
+                                   " BE=" + UbSystem::toString(bcPtr->getQ(DIR_P0M)) +
+                                   " TW=" + UbSystem::toString(bcPtr->getQ(DIR_M0P)) +
+                                   " TN=" + UbSystem::toString(bcPtr->getQ(DIR_0PP)) +
+                                   " BS=" + UbSystem::toString(bcPtr->getQ(DIR_0MM)) +
+                                   " BN=" + UbSystem::toString(bcPtr->getQ(DIR_0PM)) +
+                                   " TS=" + UbSystem::toString(bcPtr->getQ(DIR_0MP)) +
+                                   " TNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPP)) +
+                                   " TNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPP)) +
+                                   " TSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMP)) +
+                                   " TSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMP)) +
+                                   " BNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPM)) +
+                                   " BNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPM)) +
+                                   " BSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMM)) +
+                                   " BSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMM))));
             }
         }
     }
@@ -835,6 +838,8 @@ bool ShearStressCoProcessor::checkUndefindedNodes(SPtr<BCArray3D> bcArray, int i
 //////////////////////////////////////////////////////////////////////////////////////
 void ShearStressCoProcessor::initDistance()
 {
+    using namespace vf::lbm::dir;
+
     for (const auto &interactor : interactors) {
         //      typedef std::map<SPtr<Block3D>, std::set< std::vector<int> > > TransNodeIndicesMap;
         for (const auto &t : interactor->getBcNodeIndicesMap()) {
@@ -852,7 +857,7 @@ void ShearStressCoProcessor::initDistance()
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer = kernel->getGhostLayerWidth();
-            //         LBMReal collFactor = kernel->getCollisionFactor();
+            //         real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -876,22 +881,22 @@ void ShearStressCoProcessor::initDistance()
                         continue;
                     int numberOfCorner = 0;
 
-                    if (bc->getQ(D3Q27System::DIR_00P) != 999.000) {
+                    if (bc->getQ(DIR_00P) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_00M) != 999.000) {
+                    if (bc->getQ(DIR_00M) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_P00) != 999.000) {
+                    if (bc->getQ(DIR_P00) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_M00) != 999.000) {
+                    if (bc->getQ(DIR_M00) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_0P0) != 999.000) {
+                    if (bc->getQ(DIR_0P0) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_0M0) != 999.000) {
+                    if (bc->getQ(DIR_0M0) != 999.000) {
                         numberOfCorner++;
                     }
                     // if(bc->hasVelocityBoundary()||bc->hasDensityBoundary())continue;
@@ -901,17 +906,17 @@ void ShearStressCoProcessor::initDistance()
                         continue;
 
                     //////get normal and distance//////
-                    double A, B, C, D, ii = 0.0;
+                    real A, B, C, D, ii = 0.0;
                     findPlane(ix1, ix2, ix3, grid, block, A, B, C, D, ii);
                     Vector3D pointplane1 = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-                    double ix1ph         = pointplane1[0];
-                    double ix2ph         = pointplane1[1];
-                    double ix3ph         = pointplane1[2];
-                    double normalDis;
+                    real ix1ph         = pointplane1[0];
+                    real ix2ph         = pointplane1[1];
+                    real ix3ph         = pointplane1[2];
+                    real normalDis;
                     if (ii != 3) {
                         UB_THROW(UbException(UB_EXARGS, "not enough points to create plane" + UbSystem::toString(ii)));
                     } else {
-                        double s = A * ix1ph + B * ix2ph + C * ix3ph +
+                        real s = A * ix1ph + B * ix2ph + C * ix3ph +
                                    D; // The sign of s = Ax + By + Cz + D determines which side the point (x,y,z) lies
                                       // with respect to the plane. If s > 0 then the point lies on the same side as the
                                       // normal (A,B,C). If s < 0 then it lies on the opposite side, if s = 0 then the
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
index 995589b9e8b84334ea108cddac8e49bbbfa1c535..73fd42d6485321a26e11b2cf0b4b2a521a0881fd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
@@ -31,33 +31,33 @@ public:
                            SPtr<UbScheduler> rs);
     ~ShearStressCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     void addInteractor(SPtr<D3Q27Interactor> interactor);
 
 protected:
     //! Computes average and shear stress values of macroscopic quantities
-    void calculateShearStress(double timeStep);
+    void calculateShearStress(real timeStep);
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Reset data
-    void resetData(double step);
+    void resetData(real step);
     //! prepare data
     void addData();
     void clearData();
-    void reset(double step);
-    void findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, double &A, double &B, double &C,
-                   double &D, double &ii);
+    void reset(real step);
+    void findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, real &A, real &B, real &C,
+                   real &D, real &ii);
     bool checkUndefindedNodes(SPtr<BCArray3D> bcArray, int ix1, int ix2, int ix3);
     void initDistance();
 
 private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     std::vector<SPtr<D3Q27Interactor>> interactors;
-    std::vector<double> normals;
+    std::vector<real> normals;
     int gridRank;
     WbWriter *writer;
     SPtr<UbScheduler> Resetscheduler; // additional scheduler to restart averaging after a given interval
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
index d2874876ee9b36b9a17a6c4dcf88c4c7d0e948cb..8fa95c121ee61f419d778a636cacbb129ecdfe9e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
@@ -27,8 +27,8 @@ TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid,
 TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
                                                              std::shared_ptr<vf::mpi::Communicator> comm, int options,
-                                                             std::vector<int> levels, std::vector<double> &levelCoords,
-                                                             std::vector<double> &bounds, bool timeAveraging)
+                                                             std::vector<int> levels, std::vector<real> &levelCoords,
+                                                             std::vector<real> &bounds, bool timeAveraging)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm), options(options), levels(levels),
       levelCoords(levelCoords), bounds(bounds), timeAveraging(timeAveraging)
 {
@@ -59,8 +59,8 @@ void TimeAveragedValuesCoProcessor::init()
         calcMacros = &calcIncompMacroscopicValues;
     }
 
-    double begin        = scheduler->getMinBegin();
-    double gridTimeStep = grid->getTimeStep();
+    real begin        = scheduler->getMinBegin();
+    real gridTimeStep = grid->getTimeStep();
 
     if (gridTimeStep == begin || gridTimeStep == 0) {
         initData();
@@ -116,7 +116,7 @@ void TimeAveragedValuesCoProcessor::initData()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::process(double step)
+void TimeAveragedValuesCoProcessor::process(real step)
 {
     if (step == minStep) {
         initData();
@@ -149,7 +149,7 @@ void TimeAveragedValuesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "AverageValuesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::collectData(double step)
+void TimeAveragedValuesCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -195,7 +195,7 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //   UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
     int level                 = block->getLevel();
 
     // Diese Daten werden geschrieben:
@@ -267,8 +267,8 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     maxX2 -= 2;
     maxX3 -= 2;
 
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // D3Q27BoundaryConditionPtr bcPtr;
 
@@ -352,7 +352,7 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
+void TimeAveragedValuesCoProcessor::calculateAverageValues(real timeSteps)
 {
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         int i;
@@ -384,7 +384,7 @@ void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
                 maxX2 -= 2;
                 maxX3 -= 2;
 
-                LBMReal rho {0.}, ux {0.}, uy {0.}, uz {0.}, uxx {0.}, uzz {0.}, uyy {0.}, uxy {0.}, uxz {0.}, uyz {0.}, rhof {0.};
+                real rho {0.}, ux {0.}, uy {0.}, uz {0.}, uxx {0.}, uzz {0.}, uyy {0.}, uxy {0.}, uxz {0.}, uyz {0.}, rhof {0.};
 
                 for (int ix3 = minX3; ix3 <= maxX3; ix3++) {
                     for (int ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -463,14 +463,14 @@ void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
+void TimeAveragedValuesCoProcessor::calculateSubtotal(real step)
 {
     if (scheduler->isDue(step)) {
 
         // DEBUG/////////////////////
         // UBLOG(logINFO, "calculateSubtotal::step = " << step);
         ////////////////////////////
-        LBMReal f[27];
+        real f[27];
 
         //#ifdef _OPENMP
         //#pragma omp parallel private (f)
@@ -518,7 +518,7 @@ void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
                                         //////////////////////////////////////////////////////////////////////////
                                         // compute velocity
                                         //////////////////////////////////////////////////////////////////////////
-                                        LBMReal vx, vy, vz, rho;
+                                        real vx, vy, vz, rho;
                                         calcMacros(f, rho, vx, vy, vz);
                                         // double press = D3Q27System::calcPress(f, rho, vx, vy, vz);
 
@@ -574,7 +574,7 @@ void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::planarAverage(double step)
+void TimeAveragedValuesCoProcessor::planarAverage(real step)
 {
     std::ofstream ostr;
 
@@ -625,11 +625,11 @@ void TimeAveragedValuesCoProcessor::planarAverage(double step)
 
     for (int i = 0; i < size; i++) {
         int level    = levels[i];
-        double dx    = grid->getDeltaX(level);
-        double start = levelCoords[k];
-        double stop  = levelCoords[k + 1];
+        real dx    = grid->getDeltaX(level);
+        real start = levelCoords[k];
+        real stop  = levelCoords[k + 1];
 
-        for (double j = start; j < stop; j += dx) {
+        for (real j = start; j < stop; j += dx) {
             IntegrateValuesHelper intValHelp(grid, comm, bounds[0], bounds[1], j, bounds[3], bounds[4], j + dx, level);
 
             std::vector<IntegrateValuesHelper::CalcNodes> cnodes = intValHelp.getCNodes();
@@ -640,46 +640,46 @@ void TimeAveragedValuesCoProcessor::planarAverage(double step)
             calculateAverageValuesForPlane(cnodes);
 
             if (root) {
-                double numberOfFluidsNodes = intValHelp.getNumberOfFluidsNodes();
+                real numberOfFluidsNodes = intValHelp.getNumberOfFluidsNodes();
                 if (numberOfFluidsNodes > 0) {
                     ostr << j + 0.5 * dx << std::setprecision(15);
 
                     // mean density
                     if ((options & Density) == Density) {
-                        double rho  = saRho / numberOfFluidsNodes;
-                        double rhoF = saRhoF / numberOfFluidsNodes;
+                        real rho  = saRho / numberOfFluidsNodes;
+                        real rhoF = saRhoF / numberOfFluidsNodes;
                         ostr << ";" << rho << ";" << rhoF;
                     }
 
                     // mean velocity
                     if ((options & Velocity) == Velocity) {
-                        double Vx = saVx / numberOfFluidsNodes;
-                        double Vy = saVy / numberOfFluidsNodes;
-                        double Vz = saVz / numberOfFluidsNodes;
+                        real Vx = saVx / numberOfFluidsNodes;
+                        real Vy = saVy / numberOfFluidsNodes;
+                        real Vz = saVz / numberOfFluidsNodes;
                         ostr << ";" << Vx << ";" << Vy << ";" << Vz;
                     }
                     // fluctuations
                     if ((options & Fluctuations) == Fluctuations) {
-                        double Vxx = saVxx / numberOfFluidsNodes;
-                        double Vyy = saVyy / numberOfFluidsNodes;
-                        double Vzz = saVzz / numberOfFluidsNodes;
-                        double Vxy = saVxy / numberOfFluidsNodes;
-                        double Vxz = saVxz / numberOfFluidsNodes;
-                        double Vyz = saVyz / numberOfFluidsNodes;
+                        real Vxx = saVxx / numberOfFluidsNodes;
+                        real Vyy = saVyy / numberOfFluidsNodes;
+                        real Vzz = saVzz / numberOfFluidsNodes;
+                        real Vxy = saVxy / numberOfFluidsNodes;
+                        real Vxz = saVxz / numberOfFluidsNodes;
+                        real Vyz = saVyz / numberOfFluidsNodes;
                         ostr << ";" << Vxx << ";" << Vyy << ";" << Vzz << ";" << Vxy << ";" << Vxz << ";" << Vyz;
                     }
                     // triple-correlations
                     if ((options & Triplecorrelations) == Triplecorrelations) {
-                        double Vxxx = saVxxx / numberOfFluidsNodes;
-                        double Vxxy = saVxxy / numberOfFluidsNodes;
-                        double Vxxz = saVxxz / numberOfFluidsNodes;
-                        double Vyyy = saVyyy / numberOfFluidsNodes;
-                        double Vyyx = saVyyx / numberOfFluidsNodes;
-                        double Vyyz = saVyyz / numberOfFluidsNodes;
-                        double Vzzz = saVzzz / numberOfFluidsNodes;
-                        double Vzzx = saVzzx / numberOfFluidsNodes;
-                        double Vzzy = saVzzy / numberOfFluidsNodes;
-                        double Vxyz = saVxyz / numberOfFluidsNodes;
+                        real Vxxx = saVxxx / numberOfFluidsNodes;
+                        real Vxxy = saVxxy / numberOfFluidsNodes;
+                        real Vxxz = saVxxz / numberOfFluidsNodes;
+                        real Vyyy = saVyyy / numberOfFluidsNodes;
+                        real Vyyx = saVyyx / numberOfFluidsNodes;
+                        real Vyyz = saVyyz / numberOfFluidsNodes;
+                        real Vzzz = saVzzz / numberOfFluidsNodes;
+                        real Vzzx = saVzzx / numberOfFluidsNodes;
+                        real Vzzy = saVzzy / numberOfFluidsNodes;
+                        real Vxyz = saVxyz / numberOfFluidsNodes;
                         ostr << ";" << Vxxx << ";" << Vxxy << ";" << Vxxz << ";" << Vyyy << ";" << Vyyx << ";" << Vyyz
                              << ";" << Vzzz << ";" << Vzzx << ";" << Vzzy << ";" << Vxyz;
                     }
@@ -766,30 +766,30 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
     saRho  = 0;
     saRhoF = 0;
 
-    double lsaVx = 0;
-    double lsaVy = 0;
-    double lsaVz = 0;
-
-    double lsaVxx = 0;
-    double lsaVyy = 0;
-    double lsaVzz = 0;
-    double lsaVxy = 0;
-    double lsaVxz = 0;
-    double lsaVyz = 0;
-
-    double lsaVxxx = 0;
-    double lsaVxxy = 0;
-    double lsaVxxz = 0;
-    double lsaVyyy = 0;
-    double lsaVyyx = 0;
-    double lsaVyyz = 0;
-    double lsaVzzz = 0;
-    double lsaVzzx = 0;
-    double lsaVzzy = 0;
-    double lsaVxyz = 0;
-
-    double lsaRho  = 0;
-    double lsaRhoF = 0;
+    real lsaVx = 0;
+    real lsaVy = 0;
+    real lsaVz = 0;
+
+    real lsaVxx = 0;
+    real lsaVyy = 0;
+    real lsaVzz = 0;
+    real lsaVxy = 0;
+    real lsaVxz = 0;
+    real lsaVyz = 0;
+
+    real lsaVxxx = 0;
+    real lsaVxxy = 0;
+    real lsaVxxz = 0;
+    real lsaVyyy = 0;
+    real lsaVyyx = 0;
+    real lsaVyyz = 0;
+    real lsaVzzz = 0;
+    real lsaVzzx = 0;
+    real lsaVzzy = 0;
+    real lsaVxyz = 0;
+
+    real lsaRho  = 0;
+    real lsaRhoF = 0;
 
     for (IntegrateValuesHelper::CalcNodes cn : cnodes) {
         SPtr<ILBMKernel> kernel                               = cn.block->getKernel();
@@ -799,30 +799,30 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
         SPtr<AverageValuesArray3D> averagedTriplecorrelations = kernel->getDataSet()->getAverageTriplecorrelations();
 
         for (UbTupleInt3 node : cn.nodes) {
-            double aRho  = (*averagedDensity)(Rho, val<1>(node), val<2>(node), val<3>(node));
-            double aRhoF = (*averagedDensity)(RhoF, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVx = (*averagedVelocity)(Vx, val<1>(node), val<2>(node), val<3>(node));
-            double aVy = (*averagedVelocity)(Vy, val<1>(node), val<2>(node), val<3>(node));
-            double aVz = (*averagedVelocity)(Vz, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVxx = (*averagedFluctuations)(Vxx, val<1>(node), val<2>(node), val<3>(node));
-            double aVyy = (*averagedFluctuations)(Vyy, val<1>(node), val<2>(node), val<3>(node));
-            double aVzz = (*averagedFluctuations)(Vzz, val<1>(node), val<2>(node), val<3>(node));
-            double aVxy = (*averagedFluctuations)(Vxy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxz = (*averagedFluctuations)(Vxz, val<1>(node), val<2>(node), val<3>(node));
-            double aVyz = (*averagedFluctuations)(Vyz, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVxxx = (*averagedTriplecorrelations)(Vxxx, val<1>(node), val<2>(node), val<3>(node));
-            double aVxxy = (*averagedTriplecorrelations)(Vxxy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxxz = (*averagedTriplecorrelations)(Vxxz, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyy = (*averagedTriplecorrelations)(Vyyy, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyx = (*averagedTriplecorrelations)(Vyyx, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyz = (*averagedTriplecorrelations)(Vyyz, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzz = (*averagedTriplecorrelations)(Vzzz, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzx = (*averagedTriplecorrelations)(Vzzx, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzy = (*averagedTriplecorrelations)(Vzzy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxyz = (*averagedTriplecorrelations)(Vxyz, val<1>(node), val<2>(node), val<3>(node));
+            real aRho  = (*averagedDensity)(Rho, val<1>(node), val<2>(node), val<3>(node));
+            real aRhoF = (*averagedDensity)(RhoF, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVx = (*averagedVelocity)(Vx, val<1>(node), val<2>(node), val<3>(node));
+            real aVy = (*averagedVelocity)(Vy, val<1>(node), val<2>(node), val<3>(node));
+            real aVz = (*averagedVelocity)(Vz, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVxx = (*averagedFluctuations)(Vxx, val<1>(node), val<2>(node), val<3>(node));
+            real aVyy = (*averagedFluctuations)(Vyy, val<1>(node), val<2>(node), val<3>(node));
+            real aVzz = (*averagedFluctuations)(Vzz, val<1>(node), val<2>(node), val<3>(node));
+            real aVxy = (*averagedFluctuations)(Vxy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxz = (*averagedFluctuations)(Vxz, val<1>(node), val<2>(node), val<3>(node));
+            real aVyz = (*averagedFluctuations)(Vyz, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVxxx = (*averagedTriplecorrelations)(Vxxx, val<1>(node), val<2>(node), val<3>(node));
+            real aVxxy = (*averagedTriplecorrelations)(Vxxy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxxz = (*averagedTriplecorrelations)(Vxxz, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyy = (*averagedTriplecorrelations)(Vyyy, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyx = (*averagedTriplecorrelations)(Vyyx, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyz = (*averagedTriplecorrelations)(Vyyz, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzz = (*averagedTriplecorrelations)(Vzzz, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzx = (*averagedTriplecorrelations)(Vzzx, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzy = (*averagedTriplecorrelations)(Vzzy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxyz = (*averagedTriplecorrelations)(Vxyz, val<1>(node), val<2>(node), val<3>(node));
 
             lsaRho += aRho;
             lsaRhoF += aRhoF;
@@ -850,8 +850,8 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
             lsaVxyz += aVxyz;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
 
     values.push_back(lsaRho);
     values.push_back(lsaRhoF);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
index 155f293a08d0ef0726193a48c9a8fb8051bd3972..72a0f6fe16ef3805ff496ccff924b8ecf541bfef 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
@@ -44,29 +44,29 @@ public:
                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options);
     TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options, std::vector<int> levels,
-                                  std::vector<double> &levelCoords, std::vector<double> &bounds,
+                                  std::vector<real> &levelCoords, std::vector<real> &bounds,
                                   bool timeAveraging = true);
     //! Make update
-    void process(double step) override;
+    void process(real step) override;
     //! Computes subtotal of velocity , fluctuations and triple correlations
-    void calculateSubtotal(double step);
-    void addLevelCoordinate(double c);
+    void calculateSubtotal(real step);
+    void addLevelCoordinate(real c);
     void reset();
     void setWithGhostLayer(bool val);
     bool getWithGhostLayer();
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! prepare data
     void addData(const SPtr<Block3D> block);
     void clearData();
     //! Computes average values of velocity , fluctuations and triple correlations
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
 
     void init();
     void initData();
-    void planarAverage(double step);
+    void planarAverage(real step);
     void calculateAverageValuesForPlane(std::vector<IntegrateValuesHelper::CalcNodes> &cnodes);
 
 private:
@@ -74,7 +74,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     bool root;
     int minInitLevel; // min init level
@@ -82,7 +82,7 @@ private:
     int gridRank;
     int resetStepRMS;
     int resetStepMeans;
-    double averageInterval;
+    real averageInterval;
     std::string path;
     WbWriter *writer;
     bool restart, compressible;
@@ -96,29 +96,29 @@ private:
     enum Fluctuations { Vxx, Vyy, Vzz, Vxy, Vxz, Vyz };
     enum Triplecorrelations { Vxxx, Vxxy, Vxxz, Vyyy, Vyyx, Vyyz, Vzzz, Vzzx, Vzzy, Vxyz };
 
-    double saRho, saRhoF;
-    double saVx, saVy, saVz;
-    double saVxx, saVyy, saVzz, saVxy, saVxz, saVyz;
-    double saVxxx, saVxxy, saVxxz, saVyyy, saVyyx, saVyyz, saVzzz, saVzzx, saVzzy, saVxyz;
+    real saRho, saRhoF;
+    real saVx, saVy, saVz;
+    real saVxx, saVyy, saVzz, saVxy, saVxz, saVyz;
+    real saVxxx, saVxxy, saVxxz, saVyyy, saVyyx, saVyyz, saVzzz, saVzzx, saVzzy, saVxyz;
 
     int options;
-    double numberOfSteps;
-    double minStep;
-    double maxStep;
+    real numberOfSteps;
+    real minStep;
+    real maxStep;
 
     int iMinX1, iMinX2, iMinX3;
     // int iMaxX1, iMaxX2, iMaxX3;
     int iMinC;
     int iMaxC;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 
     bool planarAveraging;
     bool timeAveraging;
-    std::vector<double> levelCoords;
+    std::vector<real> levelCoords;
     std::vector<int> levels;
-    std::vector<double> bounds;
+    std::vector<real> bounds;
 
     bool withGhostLayer;
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
index 1540ef953b583be39dd7e5e4c1ee3dc678f0d12f..22158269d3d622fd4dd0f6c943b7e28b871b5172 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
@@ -8,7 +8,7 @@ TimeDependentBCCoProcessor::TimeDependentBCCoProcessor(SPtr<Grid3D> grid, SPtr<U
 //////////////////////////////////////////////////////////////////////////
 TimeDependentBCCoProcessor::~TimeDependentBCCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void TimeDependentBCCoProcessor::process(double step)
+void TimeDependentBCCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         for (SPtr<Interactor3D> inter : interactors)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
index 0483c9f826c7d1b110bd8fb07016dc5b997bad40..cb41ca9622759f99d701d1d97865d9754d9bf5e6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
@@ -18,7 +18,7 @@ public:
     TimeDependentBCCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s);
     ~TimeDependentBCCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     //! add interactors to CoProcessor
     void addInteractor(SPtr<Interactor3D> interactor);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
index d2be7f0e25ae773be89dcad02dc6b96c0651d23a..b897d4df17ceb61d88f242c17de3e2030d01e120 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
@@ -43,13 +43,13 @@ TimeseriesCoProcessor::TimeseriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler
 //////////////////////////////////////////////////////////////////////////
 TimeseriesCoProcessor::~TimeseriesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void TimeseriesCoProcessor::process(double step)
+void TimeseriesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeseriesCoProcessor::collectData(double step)
+void TimeseriesCoProcessor::collectData(real step)
 {
     h1->calculateMQ();
 
@@ -58,13 +58,13 @@ void TimeseriesCoProcessor::collectData(double step)
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = static_cast<int>(step);
         std::ofstream ostr;
-        double cellsVolume = h1->getCellsVolume();
+        real cellsVolume = h1->getCellsVolume();
 
-        double rho    = (h1->getRho()) / cellsVolume;
-        double vx     = (h1->getVx1()) / cellsVolume;
-        double vy     = (h1->getVx2()) / cellsVolume;
-        double vz     = (h1->getVx3()) / cellsVolume;
-        double volume = cellsVolume;
+        real rho    = (h1->getRho()) / cellsVolume;
+        real vx     = (h1->getVx1()) / cellsVolume;
+        real vy     = (h1->getVx2()) / cellsVolume;
+        real vz     = (h1->getVx3()) / cellsVolume;
+        real volume = cellsVolume;
 
         ostr.open(fname.c_str(), std::ios_base::out | std::ios_base::app);
         if (!ostr) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
index e92e324aab1b7cbbe16d7e6652ecb3ed0dfa9ed4..7cdc98fd02b2776e970e52ccacf9966a4411a309 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
@@ -31,10 +31,10 @@ public:
     ~TimeseriesCoProcessor() override;
 
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
 
     //! object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> h1;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
index 6a06a20d41fc8b57c43dd219623bb2d544d7a4a9..4714349a9c25ec2b5d427e3b64ad00be738915f6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
@@ -40,7 +40,7 @@ void TurbulenceIntensityCoProcessor::init()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::process(double step)
+void TurbulenceIntensityCoProcessor::process(real step)
 {
     calculateAverageValues(int(step));
 
@@ -50,7 +50,7 @@ void TurbulenceIntensityCoProcessor::process(double step)
     UBLOG(logDEBUG3, "TurbulenceIntensityCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::collectData(double step)
+void TurbulenceIntensityCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -100,7 +100,7 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //   UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -138,12 +138,12 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
                     // compute turbulence intensity
-                    double temp =
+                    real temp =
                         (*av)(ix1, ix2, ix3, AvVxxyyzz) / ((*av)(ix1, ix2, ix3, AvVx) * (*av)(ix1, ix2, ix3, AvVx) +
                                                            (*av)(ix1, ix2, ix3, AvVy) * (*av)(ix1, ix2, ix3, AvVy) +
                                                            (*av)(ix1, ix2, ix3, AvVz) * (*av)(ix1, ix2, ix3, AvVz));
 
-                    LBMReal ti = sqrt(temp);
+                    real ti = sqrt(temp);
 
                     if (UbMath::isNaN(ti))
                         UB_THROW(
@@ -179,14 +179,15 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::calculateAverageValues(double timeStep)
+void TurbulenceIntensityCoProcessor::calculateAverageValues(real timeStep)
 {
+    using namespace vf::lbm::dir;
     using namespace D3Q27System;
 
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    LBMReal f[27];
-    LBMReal vx, vy, vz;
+    real f[27];
+    real vx, vy, vz;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (SPtr<Block3D> block : blockVector[level]) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
index 40983604d25385420cba8da4af28faa33283aaf0..8f11e94b446050d2069f89dd8971fb3acc8fb787 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
@@ -19,20 +19,20 @@ class TurbulenceIntensityCoProcessor : public CoProcessor
 public:
     TurbulenceIntensityCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
                                    SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm);
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(const SPtr<Block3D> block);
     void clearData();
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
 
 private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel;
     int maxInitLevel;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
index 523ae4ffb0ac78143e071054d8804f67bd2e12d6..de781d96b582f83e38e74ca0643a0d0c23b170c0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
@@ -49,13 +49,13 @@ WriteBlocksCoProcessor::WriteBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
 //////////////////////////////////////////////////////////////////////////
 WriteBlocksCoProcessor::~WriteBlocksCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void WriteBlocksCoProcessor::process(double step)
+void WriteBlocksCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBlocksCoProcessor::collectData(double step)
+void WriteBlocksCoProcessor::collectData(real step)
 {
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = int(step);
@@ -76,7 +76,7 @@ void WriteBlocksCoProcessor::collectData(double step)
         celldatanames.push_back("bundle");
 #endif
 
-        std::vector<std::vector<double>> celldata(celldatanames.size());
+        std::vector<std::vector<real>> celldata(celldatanames.size());
 
         int nr           = 0;
         int minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -109,12 +109,12 @@ void WriteBlocksCoProcessor::collectData(double step)
                 nr += 8;
 
                 // data
-                celldata[0].push_back((double)block->isActive());
-                celldata[1].push_back((double)block->getRank());
-                celldata[2].push_back((double)block->hasInterpolationFlag());
-                celldata[3].push_back((double)block->getGlobalID());
-                celldata[4].push_back((double)block->getPart());
-                celldata[5].push_back((double)block->getLevel());
+                celldata[0].push_back((real)block->isActive());
+                celldata[1].push_back((real)block->getRank());
+                celldata[2].push_back((real)block->hasInterpolationFlag());
+                celldata[3].push_back((real)block->getGlobalID());
+                celldata[4].push_back((real)block->getPart());
+                celldata[5].push_back((real)block->getLevel());
 
                 // bool flag = false;
                 // std::vector<SPtr<Block3DConnector>> connectors;
@@ -164,7 +164,7 @@ void WriteBlocksCoProcessor::collectData(double step)
                 //}
 
 #ifdef VF_FETOL
-                celldata[6].push_back((double)block->getBundle());
+                celldata[6].push_back((real)block->getBundle());
 #endif
             }
         }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
index 837d9bbad7533d0f097c07851b352c50cccf5465..c94cd1e64861cead5d01becbd80e5b3381e6e159 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
@@ -60,12 +60,12 @@ public:
                            std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBlocksCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
 
     std::string path;
     WbWriter *writer;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
index 6c927f4945f9bcf211c7f84e38fbc6d395960b7f..4ed68397a7813314450ddd7dea33ca1824f54ac9 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
@@ -69,7 +69,7 @@ WriteBoundaryConditionsCoProcessor::WriteBoundaryConditionsCoProcessor(SPtr<Grid
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBoundaryConditionsCoProcessor::process(double step)
+void WriteBoundaryConditionsCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -77,7 +77,7 @@ void WriteBoundaryConditionsCoProcessor::process(double step)
     UBLOG(logDEBUG3, "WriteBoundaryConditionsCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBoundaryConditionsCoProcessor::collectData(double step)
+void WriteBoundaryConditionsCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -134,9 +134,9 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block)
 {
     UbTupleDouble3 org        = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
-    double level = (double)block->getLevel();
+    real level = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
index 2608a3ae8df931a5f0b347b77ad525712676aeab..31f2a5c8e31820217d76745d371a8a812acb3b67 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
@@ -64,12 +64,12 @@ public:
                                        WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBoundaryConditionsCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     void addDataGeo(SPtr<Block3D> block);
     void clearData();
 
@@ -77,7 +77,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
index 1935ea22396a43dad53b2cf0a5b2960319026656..b47f1056172c07855eda232bede05eef475c4718 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
@@ -14,7 +14,7 @@ WriteGbObjectsCoProcessor::WriteGbObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
 //////////////////////////////////////////////////////////////////////////
 WriteGbObjectsCoProcessor::~WriteGbObjectsCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void WriteGbObjectsCoProcessor::process(double step)
+void WriteGbObjectsCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -22,7 +22,7 @@ void WriteGbObjectsCoProcessor::process(double step)
 //////////////////////////////////////////////////////////////////////////
 void WriteGbObjectsCoProcessor::addGbObject(SPtr<GbObject3D> object) { objects.push_back(object); }
 //////////////////////////////////////////////////////////////////////////
-void WriteGbObjectsCoProcessor::collectData(double step)
+void WriteGbObjectsCoProcessor::collectData(real step)
 {
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleInt3> triangles;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
index 09b9bdeb766d5c4251c18a46df888fe67ef54df8..5b502044f0556d2519afc23b72ea2b50bd664832 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
@@ -24,12 +24,12 @@ public:
                               std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteGbObjectsCoProcessor() override;
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
     //! adds geometry object
     void addGbObject(SPtr<GbObject3D> object);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
 
 private:
     std::vector<SPtr<GbObject3D>> objects;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
index fc70b841ff2bee64176ec711dc579649c0f1c032..2b49861af9fd53cdec491527f40f96b4f8bc0484 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
@@ -35,7 +35,7 @@ WriteMQFromSelectionCoProcessor::WriteMQFromSelectionCoProcessor(SPtr<Grid3D> gr
 //////////////////////////////////////////////////////////////////////////
 void WriteMQFromSelectionCoProcessor::init() {}
 //////////////////////////////////////////////////////////////////////////
-void WriteMQFromSelectionCoProcessor::process(double step)
+void WriteMQFromSelectionCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -43,7 +43,7 @@ void WriteMQFromSelectionCoProcessor::process(double step)
     UBLOG(logDEBUG3, "WriteMQFromSelectionCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteMQFromSelectionCoProcessor::collectData(double step)
+void WriteMQFromSelectionCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -53,12 +53,12 @@ void WriteMQFromSelectionCoProcessor::collectData(double step)
                 UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
                 UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
 
-                double minX1 = val<1>(org);
-                double minX2 = val<2>(org);
-                double minX3 = val<3>(org);
-                double maxX1 = val<1>(org) + val<1>(blockLengths);
-                double maxX2 = val<2>(org) + val<2>(blockLengths);
-                double maxX3 = val<3>(org) + val<3>(blockLengths);
+                real minX1 = val<1>(org);
+                real minX2 = val<2>(org);
+                real minX3 = val<3>(org);
+                real maxX1 = val<1>(org) + val<1>(blockLengths);
+                real maxX2 = val<2>(org) + val<2>(blockLengths);
+                real maxX3 = val<3>(org) + val<3>(blockLengths);
 
                 if (gbObject->isCellInsideOrCuttingGbObject3D(minX1, minX2, minX3, maxX1, maxX2, maxX3)) {
                     addDataMQ(block);
@@ -110,7 +110,7 @@ void WriteMQFromSelectionCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMQFromSelectionCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level = (double)block->getLevel();
+    real level = (real)block->getLevel();
     //   double blockID = (double)block->getGlobalID();
 
     // Diese Daten werden geschrieben:
@@ -128,8 +128,8 @@ void WriteMQFromSelectionCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     if (block->getKernel()->getCompressible()) {
         calcMacros = &D3Q27System::calcCompMacroscopicValues;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
index 0dc3976b14b9930a1c1713074ff2222ad52b1fc8..64e7572797a613815d62eec91a61d68120ee1a2e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
@@ -27,10 +27,10 @@ public:
                                     std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMQFromSelectionCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addDataMQ(SPtr<Block3D> block);
     void clearData();
 
@@ -38,7 +38,7 @@ private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -50,7 +50,7 @@ private:
     std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<GbObject3D> gbObject;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
index b886271b7f3842b0453a0fa16f53ca75442da9e9..58ed15604f8937b0b33fca96dab69250a404242c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
@@ -72,7 +72,7 @@ void WriteMacroscopicQuantitiesCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesCoProcessor::process(double step)
+void WriteMacroscopicQuantitiesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -81,7 +81,7 @@ void WriteMacroscopicQuantitiesCoProcessor::process(double step)
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesCoProcessor::collectData(double step)
+void WriteMacroscopicQuantitiesCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -143,7 +143,7 @@ void WriteMacroscopicQuantitiesCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level   = (double)block->getLevel();
+    real level   = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -162,8 +162,8 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -206,8 +206,8 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
                     int index                  = 0;
                     nodeNumbers(ix1, ix2, ix3) = nr++;
                     Vector3D worldCoordinates  = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-                    nodes.push_back(UbTupleFloat3(float(worldCoordinates[0]), float(worldCoordinates[1]),
-                                                  float(worldCoordinates[2])));
+                    nodes.push_back(UbTupleFloat3(real(worldCoordinates[0]), real(worldCoordinates[1]),
+                                                  real(worldCoordinates[2])));
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
index 7fb1844e08cf7454294b658f539b95c38eb3fa34..1815d480f392fa47cdbf64038791929dc32a2ff3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMacroscopicQuantitiesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,7 +92,7 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
index c71a12ee6d7bc17297cca377712b9a6b479bb336..5c4f80887349280856a2de3791d9d0fb9012f53b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
@@ -72,7 +72,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(double step)
+void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -81,7 +81,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(double step)
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesPlusMassCoProcessor::collectData(double step)
+void WriteMacroscopicQuantitiesPlusMassCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -143,7 +143,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMacroscopicQuantitiesPlusMassCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level   = (double)block->getLevel();
+    real level   = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -162,8 +162,8 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::addDataMQ(SPtr<Block3D> bloc
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
index 07f1a12676902e5964fcb0127203c1b527c89778..9251bfc22549a7a366e57540ea8387e851d4756f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMacroscopicQuantitiesPlusMassCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,7 +92,7 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
index 00fe2808923c7e7e152ff1a4be9ef630b99797d7..ffc32b9535e477bf2f8de080f8bf45545cb336dd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
@@ -73,7 +73,7 @@ void WriteMultiphaseQuantitiesCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMultiphaseQuantitiesCoProcessor::process(double step)
+void WriteMultiphaseQuantitiesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -83,7 +83,7 @@ void WriteMultiphaseQuantitiesCoProcessor::process(double step)
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMultiphaseQuantitiesCoProcessor::collectData(double step)
+void WriteMultiphaseQuantitiesCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -147,7 +147,10 @@ void WriteMultiphaseQuantitiesCoProcessor::clearData()
 void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
+
     SPtr<LBMKernel> kernel = dynamicPointerCast<LBMKernel>(block->getKernel());
     //double level   = (double)block->getLevel();
 
@@ -173,15 +176,15 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<PressureFieldArray3D> pressure;
     if (kernel->getDataSet()->getPressureField()) pressure = kernel->getDataSet()->getPressureField();
 
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal phi[D3Q27System::ENDF + 1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho, p1, beta, kappa;
-    LBMReal densityRatio = kernel->getDensityRatio();
+    real f[D3Q27System::ENDF + 1];
+    real phi[D3Q27System::ENDF + 1];
+    real phi2[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho, p1, beta, kappa;
+    real densityRatio = kernel->getDensityRatio();
 
     kernel->getMultiphaseModelParameters(beta, kappa);
-    LBMReal phiL = kernel->getPhiL();
-    LBMReal phiH = kernel->getPhiH();
+    real phiL = kernel->getPhiL();
+    real phiH = kernel->getPhiH();
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -217,10 +220,10 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 
     // nummern vergeben und node vector erstellen + daten sammeln
     CbArray3D<int> nodeNumbers((int)maxX1, (int)maxX2, (int)maxX3, -1);
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+        new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
 
     for (int ix3 = minX3; ix3 < maxX3; ix3++) {
         for (int ix2 = minX2; ix2 < maxX2; ix2++) {
@@ -261,10 +264,10 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     }
 
     int nr = (int)nodes.size();
-    LBMReal dX1_phi;
-    LBMReal dX2_phi;
-    LBMReal dX3_phi;
-    LBMReal mu;
+    real dX1_phi;
+    real dX2_phi;
+    real dX3_phi;
+    real mu;
 
     for (int ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (int ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -353,12 +356,12 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
                     }
 
                     distributionsF->getDistribution(f, ix1, ix2, ix3);
-                    //LBMReal dU = (*divU)(ix1, ix2, ix3);
+                    //real dU = (*divU)(ix1, ix2, ix3);
 
-                    LBMReal rhoH = 1.0;
-                    LBMReal rhoL = 1.0 / densityRatio;
+                    real rhoH = 1.0;
+                    real rhoL = 1.0 / densityRatio;
                     // LBMReal rhoToPhi = (1.0 - 1.0/densityRatio);
-                    LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+                    real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
                     // rho = phi[ZERO] + (1.0 - phi[ZERO])*1.0/densityRatio;
                     rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
@@ -471,39 +474,41 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     }
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX1_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX1_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX1[k] * h[k];
     }
     return 3.0 * sum;
 }
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX2_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX2_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX2[k] * h[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX3_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX3_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX3[k] * h[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::nabla2_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::nabla2_phi(const real *const &h)
 {
+    using namespace vf::lbm::dir;
+
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * (h[k] - h[DIR_000]);
     }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
index 3825f9d4df3e744aec1605524c78f0028e4380fd..de09654bb01b8bd851df3afcea1fb3445386fff0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMultiphaseQuantitiesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,12 +92,12 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    LBMReal gradX1_phi(const LBMReal *const &);
-    LBMReal gradX2_phi(const LBMReal *const &);
-    LBMReal gradX3_phi(const LBMReal *const &);
-    LBMReal nabla2_phi(const LBMReal *const &);
+    real gradX1_phi(const real *const &);
+    real gradX2_phi(const real *const &);
+    real gradX3_phi(const real *const &);
+    real nabla2_phi(const real *const &);
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
index 900c4bc95e85e57254121882e43e89fbb05b7201..4e764c36350bbe234f4f50851a85fc35e5336049 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
@@ -71,7 +71,7 @@ void WriteThixotropyQuantitiesCoProcessor::init()
 
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteThixotropyQuantitiesCoProcessor::process(double step)
+void WriteThixotropyQuantitiesCoProcessor::process(real step)
 {
 	if (scheduler->isDue(step))
 		collectData(step);
@@ -79,7 +79,7 @@ void WriteThixotropyQuantitiesCoProcessor::process(double step)
 	UBLOG(logDEBUG3, "WriteThixotropyQuantitiesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteThixotropyQuantitiesCoProcessor::collectData(double step)
+void WriteThixotropyQuantitiesCoProcessor::collectData(real step)
 {
 	int istep = static_cast<int>(step);
 	//ConcentrationSum = 0;
@@ -144,7 +144,7 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
 	UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);;
 	UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-	double         dx = grid->getDeltaX(block);
+	real         dx = grid->getDeltaX(block);
 
 	//double level = (double)block->getLevel();
 	//double blockID = (double)block->getGlobalID();
@@ -174,8 +174,8 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
    SPtr<DistributionArray3D> distributionsF = kernel->getDataSet()->getFdistributions(); 
 	//SPtr<DistributionArray3D> distributionsH = kernel->getDataSet()->getHdistributions();
 	//LBMReal collFactorF = staticPointerCast<ThixotropyExpLBMKernel>(kernel)->getCollisionFactorF();
-	LBMReal collFactor = kernel->getCollisionFactor();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real collFactor = kernel->getCollisionFactor();
+	real f[D3Q27System::ENDF + 1];
 	//LBMReal h[D3Q27System::ENDF + 1];
 	//LBMReal viscosity=0; // lambda, gammaDot;
 	
@@ -243,12 +243,12 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 					//data[index++].push_back(collFactorF);
 
 					distributionsF->getDistribution(f, ix1, ix2, ix3);
-					LBMReal rho = D3Q27System::getDensity(f);
-					LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
+					real rho = D3Q27System::getDensity(f);
+					real shearRate = D3Q27System::getShearRate(f, collFactor);
 					//LBMReal omega = Rheology::getHerschelBulkleyCollFactor(collFactor, shearRate, rho);
 					//LBMReal omega = Rheology::getPowellEyringCollFactor(collFactor, shearRate, rho);
-					LBMReal omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
-					LBMReal viscosity = (omega == 0) ? 0 : UbMath::c1o3 * (UbMath::c1/omega-UbMath::c1o2);
+					real omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
+					real viscosity = (omega == 0) ? 0 : vf::lbm::constant::c1o3 * (vf::lbm::constant::c1o1/omega- vf::lbm::constant::c1o2);
 
 					
 					data[index++].push_back(viscosity);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
index d247c5c76bd5dc243041e53905e2189980875bd3..fbb2b6740c53b1d8a2ed9e37881fff98e4707a69 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
@@ -48,10 +48,10 @@ public:
 	WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
 	~WriteThixotropyQuantitiesCoProcessor() = default;
 
-   void process(double step) override;
+   void process(real step) override;
 
 protected:
-   void collectData(double step);
+   void collectData(real step);
    void addDataMQ(SPtr<Block3D> block);
    void clearData();
 
@@ -60,7 +60,7 @@ private:
    std::vector<UbTupleFloat3> nodes;
    std::vector<UbTupleUInt8> cells;
    std::vector<std::string> datanames;
-   std::vector<std::vector<double> > data; 
+   std::vector<std::vector<real> > data; 
    std::string path;
    WbWriter* writer;
    SPtr<LBMUnitConverter> conv;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
index e5e86a31b35e7900059649c7322478b1db1b7284..89817f187e9b943655363e7a5cb707c01019965e 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
@@ -75,7 +75,7 @@ public:
     // grid refinement
     virtual int getSendDir() const { return sendDir; }
 
-    // virtual double getSendRecieveTime() = 0;
+    // virtual real getSendRecieveTime() = 0;
 
     virtual void prepareForSendX1() = 0;
     virtual void prepareForSendX2() = 0;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
index 00ba4c4e915530a4678b7271b8f78648f267cd96..c2ba596a7bdb20c24fa373064e09854c4f6d95eb 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
@@ -97,7 +97,7 @@ public:
     bool isInterpolationConnectorCF() override { return true; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
@@ -123,7 +123,7 @@ protected:
     InterpolationProcessorPtr iprocessor;
 
     void writeICellFtoData(vector_type &data, int &index, D3Q27ICell &icellF);
-    void writeNodeToVector(vector_type &data, int &index, LBMReal *inode);
+    void writeNodeToVector(vector_type &data, int &index, real *inode);
     void getLocalMinMax(const int &gMin, const int &gMax, const bool &even, int &lMin, int &lMax,
                         const bool &dataDistribution);
     void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
@@ -135,7 +135,7 @@ protected:
     void distributeReceiveVector(SPtr<DistributionArray3D> fTo, const int &lMinX1, const int &lMinX2, const int &lMinX3,
                                  const int &lMaxX1, const int &lMaxX2, const int &lMaxX3, vector_type &data,
                                  int &index);
-    void readICellCfromData(vector_type &data, int &index, LBMReal *icellC);
+    void readICellCfromData(vector_type &data, int &index, real *icellC);
 
     void findCFnodes();
     void findCFnodes(SPtr<DistributionArray3D> fFrom, const int &lMinX1, const int &lMinX2, const int &lMinX3,
@@ -156,15 +156,17 @@ CoarseToFineVectorConnector<VectorTransmitter>::CoarseToFineVectorConnector(
       receiverEvenOddNW(receiverEvenOddNW), receiverOddEvenSE(receiverOddEvenSE), receiverOddOddNE(receiverOddOddNE),
       iprocessor(iprocessor)
 {
-    if (!(sendDir == D3Q27System::DIR_P00 || sendDir == D3Q27System::DIR_M00 || sendDir == D3Q27System::DIR_0P0 ||
-          sendDir == D3Q27System::DIR_0M0 || sendDir == D3Q27System::DIR_00P || sendDir == D3Q27System::DIR_00M ||
-          sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MM0 || sendDir == D3Q27System::DIR_PM0 ||
-          sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0M ||
-          sendDir == D3Q27System::DIR_P0M || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_0PP ||
-          sendDir == D3Q27System::DIR_0MM || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MP ||
-          sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_PMP ||
-          sendDir == D3Q27System::DIR_MMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-          sendDir == D3Q27System::DIR_PMM || sendDir == D3Q27System::DIR_MMM)) {
+    using namespace vf::lbm::dir;
+
+    if (!(sendDir == DIR_P00 || sendDir == DIR_M00 || sendDir == DIR_0P0 ||
+          sendDir == DIR_0M0 || sendDir == DIR_00P || sendDir == DIR_00M ||
+          sendDir == DIR_PP0 || sendDir == DIR_MM0 || sendDir == DIR_PM0 ||
+          sendDir == DIR_MP0 || sendDir == DIR_P0P || sendDir == DIR_M0M ||
+          sendDir == DIR_P0M || sendDir == DIR_M0P || sendDir == DIR_0PP ||
+          sendDir == DIR_0MM || sendDir == DIR_0PM || sendDir == DIR_0MP ||
+          sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_PMP ||
+          sendDir == DIR_MMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+          sendDir == DIR_PMM || sendDir == DIR_MMM)) {
         throw UbException(UB_EXARGS, "invalid constructor for this direction");
     }
 }
@@ -298,13 +300,14 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::init()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     bMaxX1 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX1();
     bMaxX2 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX2();
     bMaxX3 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX3();
 
     int sendSize      = 0;
-    LBMReal initValue = -999.0;
+    real initValue = -999.0;
 
     int sendDataPerNode = 27 /*f*/;
     int iCellSize       = 8; // size of interpolation cell
@@ -356,28 +359,28 @@ void CoarseToFineVectorConnector<VectorTransmitter>::init()
     if (senderEvenEvenSW)
         senderEvenEvenSW->getData().resize(sendSize, initValue);
     else
-        senderEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderEvenOddNW)
         senderEvenOddNW->getData().resize(sendSize, initValue);
     else
-        senderEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderOddEvenSE)
         senderOddEvenSE->getData().resize(sendSize, initValue);
     else
-        senderOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderOddOddNE)
         senderOddOddNE->getData().resize(sendSize, initValue);
     else
-        senderOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
 
     if (!receiverEvenEvenSW)
-        receiverEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverEvenOddNW)
-        receiverEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverOddEvenSE)
-        receiverOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverOddOddNE)
-        receiverOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
 
     // findCFnodes();
 }
@@ -386,6 +389,7 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::fillSendVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fFrom = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                       = (int)fFrom->getNX1();
@@ -828,7 +832,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::fillSendVectorExt(SPtr<Dist
     if (data.size() == 0)
         return;
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
@@ -878,7 +882,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::writeICellFtoData(vector_ty
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void CoarseToFineVectorConnector<VectorTransmitter>::writeNodeToVector(vector_type &data, int &index, LBMReal *inode)
+void CoarseToFineVectorConnector<VectorTransmitter>::writeNodeToVector(vector_type &data, int &index, real *inode)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         data[index++] = inode[i];
@@ -889,6 +893,7 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fTo = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                     = (int)fTo->getNX1();
@@ -1606,7 +1611,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVector(SPt
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
         for (ix2 = lMinX2; ix2 < lMaxX2; ix2++) {
             for (ix1 = lMinX1; ix1 < lMaxX1; ix1++) {
-                LBMReal icellC[27];
+                real icellC[27];
                 this->readICellCfromData(data, index, icellC);
                 iprocessor->writeINodeInv(fTo, icellC, ix1, ix2, ix3);
             }
@@ -1615,7 +1620,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVector(SPt
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void CoarseToFineVectorConnector<VectorTransmitter>::readICellCfromData(vector_type &data, int &index, LBMReal *icellC)
+void CoarseToFineVectorConnector<VectorTransmitter>::readICellCfromData(vector_type &data, int &index, real *icellC)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         icellC[i] = data[index++];
@@ -1627,6 +1632,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     int &maxX2, int &maxX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1767,6 +1774,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     CFconnectorType /*connType*/)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1923,6 +1932,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes()
     int lMinX1, lMinX2, lMinX3, lMaxX1, lMaxX2, lMaxX3;
 
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     if (block.lock()->hasInterpolationFlagCF(DIR_M00)) {
         lMinX1 = 1;
         lMaxX1 = lMinX1 + 1;
@@ -1967,7 +1978,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes(SPtr<Distributi
     if (data.size() == 0)
         return;
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
@@ -2014,7 +2025,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes(SPtr<Distributi
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-double CoarseToFineVectorConnector<VectorTransmitter>::getSendRecieveTime()
+real CoarseToFineVectorConnector<VectorTransmitter>::getSendRecieveTime()
 {
     return 0;
 }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
index 854402805621ea33484fc9c3a4a5d3e45b565719..f4f4080215869cdd4ae1c75e750daa2419af9240 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
@@ -87,7 +87,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return true; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
@@ -115,8 +115,8 @@ protected:
 
     CFconnectorType connType;
 
-    void writeICellCtoData(vector_type &data, int &index, LBMReal *icellC);
-    void writeNodeToVector(vector_type &data, int &index, LBMReal *inode);
+    void writeICellCtoData(vector_type &data, int &index, real *icellC);
+    void writeNodeToVector(vector_type &data, int &index, real *inode);
     //void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
     void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3,
                         CFconnectorType connType);
@@ -128,7 +128,7 @@ protected:
                                  const int &lMaxX1, const int &lMaxX2, const int &lMaxX3, vector_type &data,
                                  int &index);
     void readICellFfromData(vector_type &data, int &index, D3Q27ICell &icellF);
-    void readNodeFromVector(vector_type &data, int &index, LBMReal *inode);
+    void readNodeFromVector(vector_type &data, int &index, real *inode);
     void getLocalOffsets(const int &gMax, int &oMin);
     void getLocalMins(int &minX1, int &minX2, int &minX3, const int &oMinX1, const int &oMinX2, const int &oMinX3);
 
@@ -144,16 +144,18 @@ FineToCoarseVectorConnector<VectorTransmitter>::FineToCoarseVectorConnector(SPtr
     : Block3DConnector(sendDir), block(block), sender(sender), receiver(receiver), iprocessor(iprocessor),
       connType(connType)
 {
-    if (!(sendDir == D3Q27System::DIR_P00 || sendDir == D3Q27System::DIR_M00 || sendDir == D3Q27System::DIR_0P0 ||
-          sendDir == D3Q27System::DIR_0M0 || sendDir == D3Q27System::DIR_00P || sendDir == D3Q27System::DIR_00M ||
-          sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MM0 || sendDir == D3Q27System::DIR_PM0 ||
-          sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0M ||
-          sendDir == D3Q27System::DIR_P0M || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_0PP ||
-          sendDir == D3Q27System::DIR_0MM || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MP
+    using namespace vf::lbm::dir;
 
-          || sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_PMP ||
-          sendDir == D3Q27System::DIR_MMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-          sendDir == D3Q27System::DIR_PMM || sendDir == D3Q27System::DIR_MMM
+    if (!(sendDir == DIR_P00 || sendDir == DIR_M00 || sendDir == DIR_0P0 ||
+          sendDir == DIR_0M0 || sendDir == DIR_00P || sendDir == DIR_00M ||
+          sendDir == DIR_PP0 || sendDir == DIR_MM0 || sendDir == DIR_PM0 ||
+          sendDir == DIR_MP0 || sendDir == DIR_P0P || sendDir == DIR_M0M ||
+          sendDir == DIR_P0M || sendDir == DIR_M0P || sendDir == DIR_0PP ||
+          sendDir == DIR_0MM || sendDir == DIR_0PM || sendDir == DIR_0MP
+
+          || sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_PMP ||
+          sendDir == DIR_MMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+          sendDir == DIR_PMM || sendDir == DIR_MMM
 
           )) {
         throw UbException(UB_EXARGS, "invalid constructor for this direction");
@@ -224,13 +226,14 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::init()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     bMaxX1 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX1();
     bMaxX2 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX2();
     bMaxX3 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX3();
 
     int sendSize      = 0;
-    LBMReal initValue = -999.0;
+    real initValue = -999.0;
 
     int sendDataPerNode = 27 /*f*/;
     int iCellSize       = 1; // size of interpolation cell
@@ -286,6 +289,7 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fFrom = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                       = (int)fFrom->getNX1();
@@ -790,13 +794,13 @@ void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVector(SPtr<Distrib
                                                                     const int &lMaxX3, vector_type &data, int &index)
 {
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3 += 2) {
         for (ix2 = lMinX2; ix2 < lMaxX2; ix2 += 2) {
             for (ix1 = lMinX1; ix1 < lMaxX1; ix1 += 2) {
-                LBMReal icellC[27];
+                real icellC[27];
                 D3Q27ICell icellF;
 
                 int howManySolids = iprocessor->iCellHowManySolids(bcArray, ix1, ix2, ix3);
@@ -827,7 +831,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVector(SPtr<Distrib
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void FineToCoarseVectorConnector<VectorTransmitter>::writeICellCtoData(vector_type &data, int &index, LBMReal *icellC)
+void FineToCoarseVectorConnector<VectorTransmitter>::writeICellCtoData(vector_type &data, int &index, real *icellC)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         data[index++] = icellC[i];
@@ -849,6 +853,7 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::distributeReceiveVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fTo = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                     = (int)fTo->getNX1();
@@ -1145,7 +1150,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::readICellFfromData(vector_t
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void FineToCoarseVectorConnector<VectorTransmitter>::readNodeFromVector(vector_type &data, int &index, LBMReal *inode)
+void FineToCoarseVectorConnector<VectorTransmitter>::readNodeFromVector(vector_type &data, int &index, real *inode)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         inode[i] = data[index++];
@@ -1157,6 +1162,8 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     int &maxX2, int &maxX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+    
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1298,6 +1305,8 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     CFconnectorType /*connType*/)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1447,6 +1456,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMins(int &minX1, in
                                                                   const int &oMinX2, const int &oMinX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     switch (sendDir) {
         case DIR_P00:
@@ -1545,7 +1555,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMins(int &minX1, in
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-double FineToCoarseVectorConnector<VectorTransmitter>::getSendRecieveTime()
+real FineToCoarseVectorConnector<VectorTransmitter>::getSendRecieveTime()
 {
     return 0;
 }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp
index 31a01276cab5b8f51fb2361b83e477b3759b5758..1d400f74e8e4a2def9419c4e5288c512db7af936 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp
@@ -25,12 +25,14 @@ public:
 
 TEST_F(FineToCoarseVectorConnectorTest, getLocalMinMax)
 {
-    int sendDir = D3Q27System::DIR_P00;
+    using namespace vf::lbm::dir;
+
+    int sendDir = DIR_P00;
     block->setInterpolationFlagFC(sendDir);
     //FineToCoarseVectorConnector(SPtr<Block3D> block, VectorTransmitterPtr sender, VectorTransmitterPtr receiver,
                                 //int sendDir, InterpolationProcessorPtr iprocessor, CFconnectorType connType);
     InterpolationProcessorPtr iprocessor;
-    auto sut = FineToCoarseVectorConnector<TbTransmitter<CbVector<LBMReal>>>(block, senderFCevenEvenSW, receiverFCevenEvenSW, sendDir, iprocessor, EvenOddNW);
+    auto sut = FineToCoarseVectorConnector<TbTransmitter<CbVector<real>>>(block, senderFCevenEvenSW, receiverFCevenEvenSW, sendDir, iprocessor, EvenOddNW);
 
 
     //(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
index 18a8319589cde954b7c2202e10f3eda61b435671..7a12ef1bb30796bd2aa24fd6c61c07f26295950d 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
@@ -59,8 +59,10 @@ void FullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void FullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 exchangeData(maxX1 - 1, x2, x3, 0, x2, x3);
@@ -68,7 +70,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 exchangeData(1, x2, x3, maxX1, x2, x3);
@@ -76,7 +78,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, maxX2 - 1, x3, x1, 0, x3);
@@ -84,7 +86,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, 1, x3, x1, maxX2, x3);
@@ -93,7 +95,7 @@ void FullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, x2, maxX3 - 1, x1, x2, 0);
@@ -101,7 +103,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, x2, 1, x1, x2, maxX3);
@@ -109,77 +111,77 @@ void FullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(maxX1 - 1, maxX2 - 1, x3, 0, 0, x3);
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(1, maxX2 - 1, x3, maxX1, 0, x3);
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(1, 1, x3, maxX1, maxX2, x3);
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(maxX1 - 1, 1, x3, 0, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(maxX1 - 1, x2, maxX3 - 1, 0, x2, 0);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(1, x2, 1, maxX1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(maxX1 - 1, x2, 1, 0, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(1, x2, maxX3 - 1, maxX1, x2, 0);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, maxX2 - 1, maxX3 - 1, x1, 0, 0);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, 1, 1, x1, maxX2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, maxX2 - 1, 1, x1, 0, maxX3);
         }
 
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, 1, maxX3 - 1, x1, maxX2, 0);
         }
 
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(1, 1, maxX3 - 1, maxX1, maxX2, 0);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1 - 1, 1, maxX3 - 1, 0, maxX2, 0);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(1, maxX2 - 1, maxX3 - 1, maxX1, 0, 0);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1 - 1, maxX2 - 1, maxX3 - 1, 0, 0, 0);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(1, 1, 1, maxX1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1 - 1, 1, 1, 0, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(1, maxX2 - 1, 1, maxX1, 0, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1 - 1, maxX2 - 1, 1, 0, 0, maxX3);
     } else
         UB_THROW(UbException(UB_EXARGS, "unknown dir"));
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
index d5b810015abc1172699b7489df4ea0aee8b02fe2..d4cb17d156016815b20f8420a4699a428899af51 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
@@ -59,11 +59,13 @@ void FullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void FullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &sdata = sender->getData();
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 fillData(sdata, index, maxX1 - 1, x2, x3);
@@ -71,7 +73,7 @@ void FullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 fillData(sdata, index, 1, x2, x3);
@@ -79,7 +81,7 @@ void FullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, maxX2 - 1, x3);
@@ -87,7 +89,7 @@ void FullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, 1, x3);
@@ -95,7 +97,7 @@ void FullVectorConnector::fillData()
         }
     }
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, x2, maxX3 - 1);
@@ -103,7 +105,7 @@ void FullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, x2, 1);
@@ -111,24 +113,24 @@ void FullVectorConnector::fillData()
         }
     }
     // NE NW SW SE
-    else if (sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_MM0 ||
-             sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PP0 || sendDir == DIR_MP0 || sendDir == DIR_MM0 ||
+             sendDir == DIR_PM0) {
         int x1 = 0;
         int x2 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_PP0:
+            case DIR_PP0:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_MP0:
+            case DIR_MP0:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_MM0:
+            case DIR_MM0:
                 x1 = 1;
                 x2 = 1;
                 break;
-            case D3Q27System::DIR_PM0:
+            case DIR_PM0:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 break;
@@ -138,24 +140,24 @@ void FullVectorConnector::fillData()
         }
     }
     // TE TW BW BE
-    else if (sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_M0M ||
-             sendDir == D3Q27System::DIR_P0M) {
+    else if (sendDir == DIR_P0P || sendDir == DIR_M0P || sendDir == DIR_M0M ||
+             sendDir == DIR_P0M) {
         int x1 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_P0P:
+            case DIR_P0P:
                 x1 = maxX1 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_M0P:
+            case DIR_M0P:
                 x1 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_M0M:
+            case DIR_M0M:
                 x1 = 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_P0M:
+            case DIR_P0M:
                 x1 = maxX1 - 1;
                 x3 = 1;
                 break;
@@ -165,24 +167,24 @@ void FullVectorConnector::fillData()
         }
     }
     // TN BN BS TS
-    else if (sendDir == D3Q27System::DIR_0PP || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MM ||
-             sendDir == D3Q27System::DIR_0MP) {
+    else if (sendDir == DIR_0PP || sendDir == DIR_0PM || sendDir == DIR_0MM ||
+             sendDir == DIR_0MP) {
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_0PP:
+            case DIR_0PP:
                 x3 = maxX3 - 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_0PM:
+            case DIR_0PM:
                 x3 = 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_0MM:
+            case DIR_0MM:
                 x3 = 1;
                 x2 = 1;
                 break;
-            case D3Q27System::DIR_0MP:
+            case DIR_0MP:
                 x3 = maxX3 - 1;
                 x2 = 1;
                 break;
@@ -192,49 +194,49 @@ void FullVectorConnector::fillData()
         }
     }
     // TNE TNW TSW TSE BNE BNW BSW BSE
-    else if (sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_MMP ||
-             sendDir == D3Q27System::DIR_PMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-             sendDir == D3Q27System::DIR_MMM || sendDir == D3Q27System::DIR_PMM) {
+    else if (sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_MMP ||
+             sendDir == DIR_PMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+             sendDir == DIR_MMM || sendDir == DIR_PMM) {
         int x1 = 0;
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_PPP:
+            case DIR_PPP:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_MPP:
+            case DIR_MPP:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_MMP:
+            case DIR_MMP:
                 x1 = 1;
                 x2 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_PMP:
+            case DIR_PMP:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_PPM:
+            case DIR_PPM:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_MPM:
+            case DIR_MPM:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_MMM:
+            case DIR_MMM:
                 x1 = 1;
                 x2 = 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_PMM:
+            case DIR_PMM:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 x3 = 1;
@@ -253,41 +255,43 @@ void FullVectorConnector::distributeReceiveVectors()
 ////////////////////////////////////////////////////////////////////////
 void FullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 distributeData(rdata, index, 0, x2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_P00) {
+    } else if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_0M0) {
+    } else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, 0, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_0P0) {
+    } else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_00M) {
+    } else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, x2, 0);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_00P) {
+    } else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -295,25 +299,25 @@ void FullVectorConnector::distributeData()
         }
     }
     // NE NW SW SE
-    else if (sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_MM0 ||
-             sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PP0 || sendDir == DIR_MP0 || sendDir == DIR_MM0 ||
+             sendDir == DIR_PM0) {
         int x1 = 0;
         int x2 = 0;
         switch (sendDir) // wenn sendir NE dann kommen werte von SW
         {
-            case D3Q27System::DIR_PP0:
+            case DIR_PP0:
                 x1 = maxX1;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_MP0:
+            case DIR_MP0:
                 x1 = 0;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_MM0:
+            case DIR_MM0:
                 x1 = 0;
                 x2 = 0;
                 break;
-            case D3Q27System::DIR_PM0:
+            case DIR_PM0:
                 x1 = maxX1;
                 x2 = 0;
                 break;
@@ -324,27 +328,27 @@ void FullVectorConnector::distributeData()
 
     }
     // TE TW BW BE
-    else if (sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_M0M ||
-             sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0P || sendDir == DIR_M0P || sendDir == DIR_M0M ||
+             sendDir == DIR_P0M)
 
     {
         int x1 = 0;
         int x3 = 0;
         switch (sendDir) // wenn sendir NE dann kommen werte von SW
         {
-            case D3Q27System::DIR_P0P:
+            case DIR_P0P:
                 x1 = maxX1;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_M0P:
+            case DIR_M0P:
                 x1 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_M0M:
+            case DIR_M0M:
                 x1 = 0;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_P0M:
+            case DIR_P0M:
                 x1 = maxX1;
                 x3 = 0;
                 break;
@@ -354,24 +358,24 @@ void FullVectorConnector::distributeData()
         }
     }
     // TN BN BS TS
-    else if (sendDir == D3Q27System::DIR_0PP || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MM ||
-             sendDir == D3Q27System::DIR_0MP) {
+    else if (sendDir == DIR_0PP || sendDir == DIR_0PM || sendDir == DIR_0MM ||
+             sendDir == DIR_0MP) {
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_0PP:
+            case DIR_0PP:
                 x3 = maxX3;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_0PM:
+            case DIR_0PM:
                 x3 = 0;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_0MM:
+            case DIR_0MM:
                 x3 = 0;
                 x2 = 0;
                 break;
-            case D3Q27System::DIR_0MP:
+            case DIR_0MP:
                 x3 = maxX3;
                 x2 = 0;
                 break;
@@ -381,50 +385,50 @@ void FullVectorConnector::distributeData()
         }
     }
     // TNE TNW TSW TSE BNE BNW BSW BSE
-    else if (sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_MMP ||
-             sendDir == D3Q27System::DIR_PMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-             sendDir == D3Q27System::DIR_MMM || sendDir == D3Q27System::DIR_PMM) {
+    else if (sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_MMP ||
+             sendDir == DIR_PMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+             sendDir == DIR_MMM || sendDir == DIR_PMM) {
         int x1 = 0;
         int x2 = 0;
         int x3 = 0;
 
         switch (sendDir) {
-            case D3Q27System::DIR_PPP:
+            case DIR_PPP:
                 x1 = maxX1;
                 x2 = maxX2;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_MPP:
+            case DIR_MPP:
                 x1 = 0;
                 x2 = maxX2;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_MMP:
+            case DIR_MMP:
                 x1 = 0;
                 x2 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_PMP:
+            case DIR_PMP:
                 x1 = maxX1;
                 x2 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_PPM:
+            case DIR_PPM:
                 x1 = maxX1;
                 x2 = maxX2;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_MPM:
+            case DIR_MPM:
                 x1 = 0;
                 x2 = maxX2;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_MMM:
+            case DIR_MMM:
                 x1 = 0;
                 x2 = 0;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_PMM:
+            case DIR_PMM:
                 x1 = maxX1;
                 x2 = 0;
                 x3 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
index 9e8819ebd645ade3b17b2cb1e3a3f2d2c7c67d0c..ba17218ba7ec390ace7b3b964a3b33977a97f52e 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
@@ -37,6 +37,7 @@
 #include "Block3D.h"
 #include "Block3DConnector.h"
 #include "PointerDefinitions.h"
+#include "lbm/constants/D3Q27.h"
 
 //! A class provides an interface for connectors in shared memory
 class LocalBlock3DConnector : public Block3DConnector
@@ -63,7 +64,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
index 7344b0fe1272c7dac58e45d25b2d0011d65d637f..44d3f9fc251d12c9c621193d326cbc751921d957 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
@@ -54,13 +54,13 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFrom;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFrom;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFrom;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFrom;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFrom;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFrom;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTo;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTo;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsTo;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTo;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTo;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsTo;
 
     SPtr<EsoTwist3D> fFrom;
     SPtr<EsoTwist3D> fTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
index 72f43858ae7e64538b4b9bdb7028a8c895e2e84d..1bdb92f6b0d51c3bfb8daf6e149be7c1be0fecf0 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
@@ -12,57 +12,59 @@ OneDistributionFullVectorConnector::OneDistributionFullVectorConnector(SPtr<Bloc
 //////////////////////////////////////////////////////////////////////////
 void OneDistributionFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
     FullVectorConnector::init();
     
     fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
 
     int anz = 27;
     switch (sendDir) {
-        case D3Q27System::DIR_000:
+        case DIR_000:
             UB_THROW(UbException(UB_EXARGS, "ZERO not allowed"));
             break;
-        case D3Q27System::DIR_P00:
-        case D3Q27System::DIR_M00:
+        case DIR_P00:
+        case DIR_M00:
             sender->getData().resize(maxX2 * maxX3 * anz, 0.0);
             break;
-        case D3Q27System::DIR_0P0:
-        case D3Q27System::DIR_0M0:
+        case DIR_0P0:
+        case DIR_0M0:
             sender->getData().resize(maxX1 * maxX3 * anz, 0.0);
             break;
-        case D3Q27System::DIR_00P:
-        case D3Q27System::DIR_00M:
+        case DIR_00P:
+        case DIR_00M:
             sender->getData().resize(maxX1 * maxX2 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_PP0:
-        case D3Q27System::DIR_MM0:
-        case D3Q27System::DIR_PM0:
-        case D3Q27System::DIR_MP0:
+        case DIR_PP0:
+        case DIR_MM0:
+        case DIR_PM0:
+        case DIR_MP0:
             sender->getData().resize(maxX3 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_P0P:
-        case D3Q27System::DIR_M0M:
-        case D3Q27System::DIR_P0M:
-        case D3Q27System::DIR_M0P:
+        case DIR_P0P:
+        case DIR_M0M:
+        case DIR_P0M:
+        case DIR_M0P:
             sender->getData().resize(maxX2 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_0PP:
-        case D3Q27System::DIR_0MM:
-        case D3Q27System::DIR_0PM:
-        case D3Q27System::DIR_0MP:
+        case DIR_0PP:
+        case DIR_0MM:
+        case DIR_0PM:
+        case DIR_0MP:
             sender->getData().resize(maxX1 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_PPP:
-        case D3Q27System::DIR_MMM:
-        case D3Q27System::DIR_PPM:
-        case D3Q27System::DIR_MMP:
-        case D3Q27System::DIR_PMP:
-        case D3Q27System::DIR_MPM:
-        case D3Q27System::DIR_PMM:
-        case D3Q27System::DIR_MPP:
+        case DIR_PPP:
+        case DIR_MMM:
+        case DIR_PPM:
+        case DIR_MMP:
+        case DIR_PMP:
+        case DIR_MPM:
+        case DIR_PMM:
+        case DIR_MPP:
             sender->getData().resize(anz, 0.0);
             break;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
index 2342fe66f48e8b437540716a1264ecf286c7295e..a9a53455c934fa68663ac6f3ff0722cdb45f689b 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
@@ -31,9 +31,9 @@ protected:
     inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
 
     SPtr<EsoTwist3D> fDis;
 };
diff --git a/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
index 5151e61900c8b25bd6282987143c6935c6a66469..2c27eea33a01b7b680c645c4e143639f779bf4d6 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
@@ -70,7 +70,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime() { return 0; }
+    real getSendRecieveTime() { return 0; }
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
index 782b0f27d4b0cd006a27c89def02dad11ff558c5..4856743128041bbfef3048fb53c9d8110de498fb 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
@@ -63,6 +63,8 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -94,7 +96,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     int maxX3m3 = maxX3 - 3;
 
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(maxX1m3, x2, x3, minX1, x2, x3);
@@ -103,7 +105,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(minX1p3, x2, x3, maxX1, x2, x3);
@@ -112,7 +114,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, maxX2m3, x3, x1, minX2, x3);
@@ -121,7 +123,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, minX2p3, x3, x1, maxX2, x3);
@@ -131,7 +133,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, maxX3m3, x1, x2, minX3);
@@ -140,7 +142,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, minX3p3, x1, x2, maxX3);
@@ -149,7 +151,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, maxX2m3, x3, minX1, minX2, x3);
             exchangeData(maxX1m2, maxX2m2, x3, minX1p1, minX2p1, x3);
@@ -158,7 +160,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, maxX2m3, x3, maxX1, minX2, x3);
             exchangeData(minX1p2, maxX2m2, x3, maxX1m1, minX2p1, x3);
@@ -167,7 +169,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, minX2p3, x3, maxX1, maxX2, x3);
             exchangeData(minX1p2, minX2p2, x3, maxX1m1, maxX2m1, x3);
@@ -176,70 +178,70 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, minX2p3, x3, minX1, maxX2, x3);
             exchangeData(maxX1m2, minX2p2, x3, minX1p1, maxX2m1, x3);
             exchangeData(maxX1m3, minX2p2, x3, minX1, maxX2m1, x3);
             exchangeData(maxX1m2, minX2p3, x3, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, maxX3m3, minX1, x2, minX3);
             exchangeData(maxX1m2, x2, maxX3m2, minX1p1, x2, minX3p1);
             exchangeData(maxX1m3, x2, maxX3m2, minX1, x2, minX3p1);
             exchangeData(maxX1m2, x2, maxX3m3, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, minX3p3, maxX1, x2, maxX3);
             exchangeData(minX1p2, x2, minX3p2, maxX1m1, x2, maxX3m1);
             exchangeData(minX1p3, x2, minX3p2, maxX1, x2, maxX3m1);
             exchangeData(minX1p2, x2, minX3p3, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, minX3p3, minX1, x2, maxX3);
             exchangeData(maxX1m2, x2, minX3p2, minX1p1, x2, maxX3m1);
             exchangeData(maxX1m3, x2, minX3p2, minX1, x2, maxX3m1);
             exchangeData(maxX1m2, x2, minX3p3, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, maxX3m3, maxX1, x2, minX3);
             exchangeData(minX1p2, x2, maxX3m2, maxX1m1, x2, minX3p1);
             exchangeData(minX1p3, x2, maxX3m2, maxX1, x2, minX3p1);
             exchangeData(minX1p2, x2, maxX3m3, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, maxX3m3, x1, minX2, minX3);
             exchangeData(x1, maxX2m2, maxX3m2, x1, minX2p1, minX3p1);
             exchangeData(x1, maxX2m3, maxX3m2, x1, minX2, minX3p1);
             exchangeData(x1, maxX2m2, maxX3m3, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, minX3p3, x1, maxX2, maxX3);
             exchangeData(x1, minX2p2, minX3p2, x1, maxX2m1, maxX3m1);
             exchangeData(x1, minX2p3, minX3p2, x1, maxX2, maxX3m1);
             exchangeData(x1, minX2p2, minX3p3, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, minX3p3, x1, minX2, maxX3);
             exchangeData(x1, maxX2m2, minX3p2, x1, minX2p1, maxX3m1);
             exchangeData(x1, maxX2m3, minX3p2, x1, minX2, maxX3m1);
             exchangeData(x1, maxX2m2, minX3p3, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, maxX3m3, x1, maxX2, minX3);
             exchangeData(x1, minX2p2, maxX3m2, x1, maxX2m1, minX3p1);
             exchangeData(x1, minX2p3, maxX3m2, x1, maxX2, minX3p1);
             exchangeData(x1, minX2p2, maxX3m3, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(minX1p3, minX2p3, maxX3m3, maxX1, maxX2, minX3);
         exchangeData(minX1p2, minX2p2, maxX3m2, maxX1m1, maxX2m1, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m2, maxX1, maxX2m1, minX3p1);
@@ -248,7 +250,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, maxX3m2, maxX1, maxX2, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m3, maxX1, maxX2m1, minX3);
         exchangeData(minX1p2, minX2p3, maxX3m3, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1m3, minX1p3, maxX3m3, minX1, maxX2, minX3);
         exchangeData(maxX1m2, minX1p2, maxX3m2, minX1p1, maxX2m1, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m2, minX1, maxX2m1, minX3p1);
@@ -257,7 +259,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX1p3, maxX3m2, minX1, maxX2, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m3, minX1, maxX2m1, minX3);
         exchangeData(maxX1m2, minX1p3, maxX3m3, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(minX1p3, maxX2m3, maxX3m3, maxX1, minX2, minX3);
         exchangeData(minX1p2, maxX2m2, maxX3m2, maxX1m1, minX2p1, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m2, maxX1, minX2p1, minX3p1);
@@ -266,7 +268,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, maxX3m2, maxX1, minX2, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m3, maxX1, minX2p1, minX3);
         exchangeData(minX1p2, maxX2m3, maxX3m3, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1m3, maxX2m3, maxX3m3, minX1, minX2, minX3);
         exchangeData(maxX1m2, maxX2m2, maxX3m2, minX1p1, minX2p1, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m2, minX1, minX2p1, minX3p1);
@@ -275,7 +277,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, maxX2m3, maxX3m2, minX1, minX2, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m3, minX1, minX2p1, minX3);
         exchangeData(maxX1m2, maxX2m3, maxX3m3, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(minX1p3, minX2p3, minX3p3, maxX1, maxX2, maxX3);
         exchangeData(minX1p2, minX2p2, minX3p2, maxX1m1, maxX2m1, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p2, maxX1, maxX2m1, maxX3m1);
@@ -284,7 +286,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, minX3p2, maxX1, maxX2, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p3, maxX1, maxX2m1, maxX3);
         exchangeData(minX1p2, minX2p3, minX3p3, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1m3, minX2p3, minX3p3, minX1, maxX2, maxX3);
         exchangeData(maxX1m2, minX2p2, minX3p2, minX1p1, maxX2m1, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p2, minX1, maxX2m1, maxX3m1);
@@ -293,7 +295,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX2p3, minX3p2, minX1, maxX2, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p3, minX1, maxX2m1, maxX3);
         exchangeData(maxX1m2, minX2p3, minX3p3, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(minX1p3, maxX2m3, minX3p3, maxX1, minX2, maxX3);
         exchangeData(minX1p2, maxX2m2, minX3p2, maxX1m1, minX2p1, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p2, maxX1, minX2p1, maxX3m1);
@@ -302,7 +304,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, minX3p2, maxX1, minX2, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p3, maxX1, minX2p1, maxX3);
         exchangeData(minX1p2, maxX2m3, minX3p3, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1m3, maxX2m3, minX3p3, minX1, minX2, maxX3);
         exchangeData(maxX1m2, maxX2m2, minX3p2, minX1p1, minX2p1, maxX3m1);
         exchangeData(maxX1m3, maxX2m2, minX3p2, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
index 1f53ca70aecd3531c986edb8a3933e9d4c5c5ba7..21a37e3427747d0c813c34b6fcf14b18ff3e1a76 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
@@ -58,29 +58,29 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
 
 	SPtr<EsoTwist3D> fFrom, hFrom, hFrom2;
     SPtr<EsoTwist3D> fTo, hTo, hTo2;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
index 0b94f7b4a971462517db6dd07050942f4b8595c0..8334b93d21529a54fbe6b29be465d60d2c63e308 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
@@ -50,6 +50,8 @@ ThreeDistributionsDoubleGhostLayerFullVectorConnector::ThreeDistributionsDoubleG
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -60,37 +62,37 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::init()
    int anz = 3*27+1;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
@@ -104,6 +106,8 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -138,7 +142,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, maxX1m3, x2, x3);
@@ -147,7 +151,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, minX1p3, x2, x3);
@@ -156,7 +160,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, maxX2m3, x3);
@@ -165,7 +169,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, minX2p3, x3);
@@ -175,7 +179,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, maxX3m3);
@@ -184,7 +188,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, minX3p3);
@@ -193,7 +197,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, maxX2m3, x3);
             fillData(sdata, index, maxX1m2, maxX2m2, x3);
@@ -202,7 +206,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, maxX2m3, x3);
             fillData(sdata, index, minX1p2, maxX2m2, x3);
@@ -211,7 +215,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, minX2p3, x3);
             fillData(sdata, index, minX1p2, minX2p2, x3);
@@ -220,70 +224,70 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, minX2p3, x3);
             fillData(sdata, index, maxX1m2, minX2p2, x3);
             fillData(sdata, index, maxX1m3, minX2p2, x3);
             fillData(sdata, index, maxX1m2, minX2p3, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, maxX3m3);
             fillData(sdata, index, maxX1m2, x2, maxX3m2);
             fillData(sdata, index, maxX1m3, x2, maxX3m2);
             fillData(sdata, index, maxX1m2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, minX3p3);
             fillData(sdata, index, minX1p2, x2, minX3p2);
             fillData(sdata, index, minX1p3, x2, minX3p2);
             fillData(sdata, index, minX1p2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, minX3p3);
             fillData(sdata, index, maxX1m2, x2, minX3p2);
             fillData(sdata, index, maxX1m3, x2, minX3p2);
             fillData(sdata, index, maxX1m2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, maxX3m3);
             fillData(sdata, index, minX1p2, x2, maxX3m2);
             fillData(sdata, index, minX1p3, x2, maxX3m2);
             fillData(sdata, index, minX1p2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, maxX3m3);
             fillData(sdata, index, x1, maxX2m2, maxX3m2);
             fillData(sdata, index, x1, maxX2m3, maxX3m2);
             fillData(sdata, index, x1, maxX2m2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, minX3p3);
             fillData(sdata, index, x1, minX2p2, minX3p2);
             fillData(sdata, index, x1, minX2p3, minX3p2);
             fillData(sdata, index, x1, minX2p2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, minX3p3);
             fillData(sdata, index, x1, maxX2m2, minX3p2);
             fillData(sdata, index, x1, maxX2m3, minX3p2);
             fillData(sdata, index, x1, maxX2m2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, maxX3m3);
             fillData(sdata, index, x1, minX2p2, maxX3m2);
             fillData(sdata, index, x1, minX2p3, maxX3m2);
             fillData(sdata, index, x1, minX2p2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         fillData(sdata, index, minX1p3, minX2p3, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p2, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m2);
@@ -292,7 +296,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p2, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m2);
@@ -301,7 +305,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m2, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m2);
@@ -310,7 +314,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m2, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m2);
@@ -319,7 +323,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         fillData(sdata, index, minX1p3, minX2p3, minX3p3);
         fillData(sdata, index, minX1p2, minX2p2, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p2);
@@ -328,7 +332,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p3);
         fillData(sdata, index, minX1p2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         fillData(sdata, index, maxX1m3, minX2p3, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p2, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p2);
@@ -337,7 +341,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX2p3, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         fillData(sdata, index, minX1p3, maxX2m3, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m2, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p2);
@@ -346,7 +350,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         fillData(sdata, index, maxX1m3, maxX2m3, minX3p3);
         fillData(sdata, index, maxX1m2, maxX2m2, minX3p2);
         fillData(sdata, index, maxX1m3, maxX2m2, minX3p2);
@@ -367,6 +371,8 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeReceiveVec
 ////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
@@ -400,7 +406,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
     int maxX3m2 = maxX3 - 2;
     //int maxX3m3 = maxX3 - 3;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, minX1, x2, x3);
@@ -408,7 +414,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_P00) {
+    else if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
@@ -416,7 +422,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, minX2, x3);
@@ -424,7 +430,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
@@ -432,7 +438,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, minX3);
@@ -440,7 +446,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -448,7 +454,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, minX2, x3);
             distributeData(rdata, index, minX1p1, minX2p1, x3);
@@ -456,7 +462,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, minX1p1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, minX2, x3);
             distributeData(rdata, index, maxX1m1, minX2p1, x3);
@@ -464,7 +470,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, maxX2, x3);
             distributeData(rdata, index, maxX1m1, maxX2m1, x3);
@@ -472,70 +478,70 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, maxX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, maxX2, x3);
             distributeData(rdata, index, minX1p1, maxX2m1, x3);
             distributeData(rdata, index, minX1, maxX2m1, x3);
             distributeData(rdata, index, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_M0M)
+    } else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, minX3);
             distributeData(rdata, index, minX1p1, x2, minX3p1);
             distributeData(rdata, index, minX1, x2, minX3p1);
             distributeData(rdata, index, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0P)
+    else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, maxX3);
             distributeData(rdata, index, maxX1m1, x2, maxX3m1);
             distributeData(rdata, index, maxX1, x2, maxX3m1);
             distributeData(rdata, index, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, maxX3);
             distributeData(rdata, index, minX1p1, x2, maxX3m1);
             distributeData(rdata, index, minX1, x2, maxX3m1);
             distributeData(rdata, index, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, minX3);
             distributeData(rdata, index, maxX1m1, x2, minX3p1);
             distributeData(rdata, index, maxX1, x2, minX3p1);
             distributeData(rdata, index, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, minX3);
             distributeData(rdata, index, x1, minX2p1, minX3p1);
             distributeData(rdata, index, x1, minX2, minX3p1);
             distributeData(rdata, index, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, maxX3);
             distributeData(rdata, index, x1, maxX2m1, maxX3m1);
             distributeData(rdata, index, x1, maxX2, maxX3m1);
             distributeData(rdata, index, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, maxX3);
             distributeData(rdata, index, x1, minX2p1, maxX3m1);
             distributeData(rdata, index, x1, minX2, maxX3m1);
             distributeData(rdata, index, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, minX3);
             distributeData(rdata, index, x1, maxX2m1, minX3p1);
             distributeData(rdata, index, x1, maxX2, minX3p1);
             distributeData(rdata, index, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_PPM) {
+    else if (sendDir == DIR_PPM) {
         distributeData(rdata, index, maxX1, maxX2, minX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3p1);
@@ -544,7 +550,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3);
         distributeData(rdata, index, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         distributeData(rdata, index, minX1, maxX2, minX3);
         distributeData(rdata, index, minX1p1, maxX2m1, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3p1);
@@ -553,7 +559,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3);
         distributeData(rdata, index, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         distributeData(rdata, index, maxX1, minX2, minX3);
         distributeData(rdata, index, maxX1m1, minX2p1, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3p1);
@@ -562,7 +568,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3);
         distributeData(rdata, index, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         distributeData(rdata, index, minX1, minX2, minX3);
         distributeData(rdata, index, minX1p1, minX2p1, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3p1);
@@ -571,7 +577,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, minX2, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3);
         distributeData(rdata, index, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         distributeData(rdata, index, maxX1, maxX2, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3m1);
@@ -580,7 +586,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         distributeData(rdata, index, minX1, maxX2, maxX3);
         distributeData(rdata, index, minX1p1, maxX2m1, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3m1);
@@ -589,7 +595,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3);
         distributeData(rdata, index, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         distributeData(rdata, index, maxX1, minX2, maxX3);
         distributeData(rdata, index, maxX1m1, minX2p1, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3m1);
@@ -598,7 +604,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3);
         distributeData(rdata, index, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MMP) {
+    } else if (sendDir == DIR_MMP) {
         distributeData(rdata, index, minX1, minX2, maxX3);
         distributeData(rdata, index, minX1p1, minX2p1, maxX3m1);
         distributeData(rdata, index, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
index 408a8e79d8a22ae71f0f03d51205b6c01a391aae..e124251d8f8be21aa33ccb8dc91f7e9b40356827 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
@@ -68,21 +68,21 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
 
    SPtr<EsoTwist3D> h2Dis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
index 6ccac29f41a297581b263164c3a2fc491022be00..3cb443a474d373552125ddd2626a4f797d040429 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
@@ -55,29 +55,29 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
 
 	SPtr<EsoTwist3D> fFrom, hFrom, hFrom2;
     SPtr<EsoTwist3D> fTo, hTo, hTo2;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
index 534076b7bfcb63386c75d6d8619b4b56bbd5c5ee..1b4f243eeccd39c8bdcc0ac3bf1c8b3510053d58 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
@@ -50,6 +50,8 @@ ThreeDistributionsFullVectorConnector::ThreeDistributionsFullVectorConnector(SPt
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -59,37 +61,37 @@ void ThreeDistributionsFullVectorConnector::init()
    int anz = 3*27;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
index c37ff06984e83950ed4edbe03da0f38dc6ffe190..794ba2d01d8015b347e8a1712da943b82d80b83c 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
@@ -62,21 +62,21 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
 
    SPtr<EsoTwist3D> h2Dis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h b/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
index b36da8850e958c72d519c85bf383c26a8880e5ee..f300f005e9e52e398b0d2131ad4cbba027170ede 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
@@ -39,8 +39,8 @@
 #include "basics/transmitter/TbTransmitterLocal.h"
 #include <PointerDefinitions.h>
 
-using VectorTransmitter    = TbTransmitter<CbVector<LBMReal>>;
+using VectorTransmitter    = TbTransmitter<CbVector<real>>;
 using vector_type          = VectorTransmitter::value_type;
-using VectorTransmitterPtr = SPtr<TbTransmitter<CbVector<LBMReal>>>;
+using VectorTransmitterPtr = SPtr<TbTransmitter<CbVector<real>>>;
 
 #endif // TransmitterType_h__
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
index b46ffebeb144569311272050893118f34e862398..121fef6b86040aab370e9ffd925bea0033d61446 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
@@ -61,6 +61,8 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -92,7 +94,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     int maxX3m3 = maxX3 - 3;
 
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(maxX1m3, x2, x3, minX1, x2, x3);
@@ -101,7 +103,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(minX1p3, x2, x3, maxX1, x2, x3);
@@ -110,7 +112,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, maxX2m3, x3, x1, minX2, x3);
@@ -119,7 +121,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, minX2p3, x3, x1, maxX2, x3);
@@ -129,7 +131,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, maxX3m3, x1, x2, minX3);
@@ -138,7 +140,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, minX3p3, x1, x2, maxX3);
@@ -147,7 +149,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, maxX2m3, x3, minX1, minX2, x3);
             exchangeData(maxX1m2, maxX2m2, x3, minX1p1, minX2p1, x3);
@@ -156,7 +158,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, maxX2m3, x3, maxX1, minX2, x3);
             exchangeData(minX1p2, maxX2m2, x3, maxX1m1, minX2p1, x3);
@@ -165,7 +167,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, minX2p3, x3, maxX1, maxX2, x3);
             exchangeData(minX1p2, minX2p2, x3, maxX1m1, maxX2m1, x3);
@@ -174,70 +176,70 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, minX2p3, x3, minX1, maxX2, x3);
             exchangeData(maxX1m2, minX2p2, x3, minX1p1, maxX2m1, x3);
             exchangeData(maxX1m3, minX2p2, x3, minX1, maxX2m1, x3);
             exchangeData(maxX1m2, minX2p3, x3, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, maxX3m3, minX1, x2, minX3);
             exchangeData(maxX1m2, x2, maxX3m2, minX1p1, x2, minX3p1);
             exchangeData(maxX1m3, x2, maxX3m2, minX1, x2, minX3p1);
             exchangeData(maxX1m2, x2, maxX3m3, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, minX3p3, maxX1, x2, maxX3);
             exchangeData(minX1p2, x2, minX3p2, maxX1m1, x2, maxX3m1);
             exchangeData(minX1p3, x2, minX3p2, maxX1, x2, maxX3m1);
             exchangeData(minX1p2, x2, minX3p3, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, minX3p3, minX1, x2, maxX3);
             exchangeData(maxX1m2, x2, minX3p2, minX1p1, x2, maxX3m1);
             exchangeData(maxX1m3, x2, minX3p2, minX1, x2, maxX3m1);
             exchangeData(maxX1m2, x2, minX3p3, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, maxX3m3, maxX1, x2, minX3);
             exchangeData(minX1p2, x2, maxX3m2, maxX1m1, x2, minX3p1);
             exchangeData(minX1p3, x2, maxX3m2, maxX1, x2, minX3p1);
             exchangeData(minX1p2, x2, maxX3m3, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, maxX3m3, x1, minX2, minX3);
             exchangeData(x1, maxX2m2, maxX3m2, x1, minX2p1, minX3p1);
             exchangeData(x1, maxX2m3, maxX3m2, x1, minX2, minX3p1);
             exchangeData(x1, maxX2m2, maxX3m3, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, minX3p3, x1, maxX2, maxX3);
             exchangeData(x1, minX2p2, minX3p2, x1, maxX2m1, maxX3m1);
             exchangeData(x1, minX2p3, minX3p2, x1, maxX2, maxX3m1);
             exchangeData(x1, minX2p2, minX3p3, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, minX3p3, x1, minX2, maxX3);
             exchangeData(x1, maxX2m2, minX3p2, x1, minX2p1, maxX3m1);
             exchangeData(x1, maxX2m3, minX3p2, x1, minX2, maxX3m1);
             exchangeData(x1, maxX2m2, minX3p3, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, maxX3m3, x1, maxX2, minX3);
             exchangeData(x1, minX2p2, maxX3m2, x1, maxX2m1, minX3p1);
             exchangeData(x1, minX2p3, maxX3m2, x1, maxX2, minX3p1);
             exchangeData(x1, minX2p2, maxX3m3, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(minX1p3, minX2p3, maxX3m3, maxX1, maxX2, minX3);
         exchangeData(minX1p2, minX2p2, maxX3m2, maxX1m1, maxX2m1, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m2, maxX1, maxX2m1, minX3p1);
@@ -246,7 +248,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, maxX3m2, maxX1, maxX2, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m3, maxX1, maxX2m1, minX3);
         exchangeData(minX1p2, minX2p3, maxX3m3, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1m3, minX1p3, maxX3m3, minX1, maxX2, minX3);
         exchangeData(maxX1m2, minX1p2, maxX3m2, minX1p1, maxX2m1, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m2, minX1, maxX2m1, minX3p1);
@@ -255,7 +257,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX1p3, maxX3m2, minX1, maxX2, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m3, minX1, maxX2m1, minX3);
         exchangeData(maxX1m2, minX1p3, maxX3m3, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(minX1p3, maxX2m3, maxX3m3, maxX1, minX2, minX3);
         exchangeData(minX1p2, maxX2m2, maxX3m2, maxX1m1, minX2p1, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m2, maxX1, minX2p1, minX3p1);
@@ -264,7 +266,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, maxX3m2, maxX1, minX2, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m3, maxX1, minX2p1, minX3);
         exchangeData(minX1p2, maxX2m3, maxX3m3, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1m3, maxX2m3, maxX3m3, minX1, minX2, minX3);
         exchangeData(maxX1m2, maxX2m2, maxX3m2, minX1p1, minX2p1, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m2, minX1, minX2p1, minX3p1);
@@ -273,7 +275,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, maxX2m3, maxX3m2, minX1, minX2, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m3, minX1, minX2p1, minX3);
         exchangeData(maxX1m2, maxX2m3, maxX3m3, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(minX1p3, minX2p3, minX3p3, maxX1, maxX2, maxX3);
         exchangeData(minX1p2, minX2p2, minX3p2, maxX1m1, maxX2m1, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p2, maxX1, maxX2m1, maxX3m1);
@@ -282,7 +284,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, minX3p2, maxX1, maxX2, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p3, maxX1, maxX2m1, maxX3);
         exchangeData(minX1p2, minX2p3, minX3p3, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1m3, minX2p3, minX3p3, minX1, maxX2, maxX3);
         exchangeData(maxX1m2, minX2p2, minX3p2, minX1p1, maxX2m1, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p2, minX1, maxX2m1, maxX3m1);
@@ -291,7 +293,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX2p3, minX3p2, minX1, maxX2, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p3, minX1, maxX2m1, maxX3);
         exchangeData(maxX1m2, minX2p3, minX3p3, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(minX1p3, maxX2m3, minX3p3, maxX1, minX2, maxX3);
         exchangeData(minX1p2, maxX2m2, minX3p2, maxX1m1, minX2p1, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p2, maxX1, minX2p1, maxX3m1);
@@ -300,7 +302,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, minX3p2, maxX1, minX2, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p3, maxX1, minX2p1, maxX3);
         exchangeData(minX1p2, maxX2m3, minX3p3, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1m3, maxX2m3, minX3p3, minX1, minX2, maxX3);
         exchangeData(maxX1m2, maxX2m2, minX3p2, minX1p1, minX2p1, maxX3m1);
         exchangeData(maxX1m3, maxX2m2, minX3p2, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
index bc431f9f3bbb16587df76355c395fff780137b22..bbd1c5a346ac50b08c78794ea3b00457ba4836b7 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
@@ -58,21 +58,21 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
 	SPtr<EsoTwist3D> fFrom, hFrom;
     SPtr<EsoTwist3D> fTo, hTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
index 9dc8a99deb20f8f49f40f7d2e7c8a0c66b687fcb..8f6b88898a9da1cfca9aee49ae4cb084ee54217a 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
@@ -50,6 +50,8 @@ TwoDistributionsDoubleGhostLayerFullVectorConnector::TwoDistributionsDoubleGhost
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::init()
 {
+   using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -59,37 +61,37 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::init()
    int anz = 2*27+1;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
@@ -103,6 +105,8 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -137,7 +141,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, maxX1m3, x2, x3);
@@ -146,7 +150,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, minX1p3, x2, x3);
@@ -155,7 +159,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, maxX2m3, x3);
@@ -164,7 +168,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, minX2p3, x3);
@@ -174,7 +178,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, maxX3m3);
@@ -183,7 +187,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, minX3p3);
@@ -192,7 +196,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, maxX2m3, x3);
             fillData(sdata, index, maxX1m2, maxX2m2, x3);
@@ -201,7 +205,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, maxX2m3, x3);
             fillData(sdata, index, minX1p2, maxX2m2, x3);
@@ -210,7 +214,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, minX2p3, x3);
             fillData(sdata, index, minX1p2, minX2p2, x3);
@@ -219,70 +223,70 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, minX2p3, x3);
             fillData(sdata, index, maxX1m2, minX2p2, x3);
             fillData(sdata, index, maxX1m3, minX2p2, x3);
             fillData(sdata, index, maxX1m2, minX2p3, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, maxX3m3);
             fillData(sdata, index, maxX1m2, x2, maxX3m2);
             fillData(sdata, index, maxX1m3, x2, maxX3m2);
             fillData(sdata, index, maxX1m2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, minX3p3);
             fillData(sdata, index, minX1p2, x2, minX3p2);
             fillData(sdata, index, minX1p3, x2, minX3p2);
             fillData(sdata, index, minX1p2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, minX3p3);
             fillData(sdata, index, maxX1m2, x2, minX3p2);
             fillData(sdata, index, maxX1m3, x2, minX3p2);
             fillData(sdata, index, maxX1m2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, maxX3m3);
             fillData(sdata, index, minX1p2, x2, maxX3m2);
             fillData(sdata, index, minX1p3, x2, maxX3m2);
             fillData(sdata, index, minX1p2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, maxX3m3);
             fillData(sdata, index, x1, maxX2m2, maxX3m2);
             fillData(sdata, index, x1, maxX2m3, maxX3m2);
             fillData(sdata, index, x1, maxX2m2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, minX3p3);
             fillData(sdata, index, x1, minX2p2, minX3p2);
             fillData(sdata, index, x1, minX2p3, minX3p2);
             fillData(sdata, index, x1, minX2p2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, minX3p3);
             fillData(sdata, index, x1, maxX2m2, minX3p2);
             fillData(sdata, index, x1, maxX2m3, minX3p2);
             fillData(sdata, index, x1, maxX2m2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, maxX3m3);
             fillData(sdata, index, x1, minX2p2, maxX3m2);
             fillData(sdata, index, x1, minX2p3, maxX3m2);
             fillData(sdata, index, x1, minX2p2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         fillData(sdata, index, minX1p3, minX2p3, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p2, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m2);
@@ -291,7 +295,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p2, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m2);
@@ -300,7 +304,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m2, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m2);
@@ -309,7 +313,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m2, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m2);
@@ -318,7 +322,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         fillData(sdata, index, minX1p3, minX2p3, minX3p3);
         fillData(sdata, index, minX1p2, minX2p2, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p2);
@@ -327,7 +331,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p3);
         fillData(sdata, index, minX1p2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         fillData(sdata, index, maxX1m3, minX2p3, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p2, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p2);
@@ -336,7 +340,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX2p3, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         fillData(sdata, index, minX1p3, maxX2m3, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m2, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p2);
@@ -345,7 +349,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         fillData(sdata, index, maxX1m3, maxX2m3, minX3p3);
         fillData(sdata, index, maxX1m2, maxX2m2, minX3p2);
         fillData(sdata, index, maxX1m3, maxX2m2, minX3p2);
@@ -366,6 +370,8 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeReceiveVecto
 ////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
@@ -399,7 +405,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
     int maxX3m2 = maxX3 - 2;
     //int maxX3m3 = maxX3 - 3;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, minX1, x2, x3);
@@ -407,7 +413,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_P00) {
+    else if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
@@ -415,7 +421,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, minX2, x3);
@@ -423,7 +429,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
@@ -431,7 +437,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, minX3);
@@ -439,7 +445,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -447,7 +453,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, minX2, x3);
             distributeData(rdata, index, minX1p1, minX2p1, x3);
@@ -455,7 +461,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, minX1p1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, minX2, x3);
             distributeData(rdata, index, maxX1m1, minX2p1, x3);
@@ -463,7 +469,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, maxX2, x3);
             distributeData(rdata, index, maxX1m1, maxX2m1, x3);
@@ -471,70 +477,70 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, maxX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, maxX2, x3);
             distributeData(rdata, index, minX1p1, maxX2m1, x3);
             distributeData(rdata, index, minX1, maxX2m1, x3);
             distributeData(rdata, index, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_M0M)
+    } else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, minX3);
             distributeData(rdata, index, minX1p1, x2, minX3p1);
             distributeData(rdata, index, minX1, x2, minX3p1);
             distributeData(rdata, index, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0P)
+    else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, maxX3);
             distributeData(rdata, index, maxX1m1, x2, maxX3m1);
             distributeData(rdata, index, maxX1, x2, maxX3m1);
             distributeData(rdata, index, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, maxX3);
             distributeData(rdata, index, minX1p1, x2, maxX3m1);
             distributeData(rdata, index, minX1, x2, maxX3m1);
             distributeData(rdata, index, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, minX3);
             distributeData(rdata, index, maxX1m1, x2, minX3p1);
             distributeData(rdata, index, maxX1, x2, minX3p1);
             distributeData(rdata, index, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, minX3);
             distributeData(rdata, index, x1, minX2p1, minX3p1);
             distributeData(rdata, index, x1, minX2, minX3p1);
             distributeData(rdata, index, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, maxX3);
             distributeData(rdata, index, x1, maxX2m1, maxX3m1);
             distributeData(rdata, index, x1, maxX2, maxX3m1);
             distributeData(rdata, index, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, maxX3);
             distributeData(rdata, index, x1, minX2p1, maxX3m1);
             distributeData(rdata, index, x1, minX2, maxX3m1);
             distributeData(rdata, index, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, minX3);
             distributeData(rdata, index, x1, maxX2m1, minX3p1);
             distributeData(rdata, index, x1, maxX2, minX3p1);
             distributeData(rdata, index, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_PPM) {
+    else if (sendDir == DIR_PPM) {
         distributeData(rdata, index, maxX1, maxX2, minX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3p1);
@@ -543,7 +549,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3);
         distributeData(rdata, index, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         distributeData(rdata, index, minX1, maxX2, minX3);
         distributeData(rdata, index, minX1p1, maxX2m1, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3p1);
@@ -552,7 +558,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3);
         distributeData(rdata, index, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         distributeData(rdata, index, maxX1, minX2, minX3);
         distributeData(rdata, index, maxX1m1, minX2p1, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3p1);
@@ -561,7 +567,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3);
         distributeData(rdata, index, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         distributeData(rdata, index, minX1, minX2, minX3);
         distributeData(rdata, index, minX1p1, minX2p1, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3p1);
@@ -570,7 +576,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, minX2, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3);
         distributeData(rdata, index, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         distributeData(rdata, index, maxX1, maxX2, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3m1);
@@ -579,7 +585,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         distributeData(rdata, index, minX1, maxX2, maxX3);
         distributeData(rdata, index, minX1p1, maxX2m1, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3m1);
@@ -588,7 +594,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3);
         distributeData(rdata, index, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         distributeData(rdata, index, maxX1, minX2, maxX3);
         distributeData(rdata, index, maxX1m1, minX2p1, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3m1);
@@ -597,7 +603,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3);
         distributeData(rdata, index, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MMP) {
+    } else if (sendDir == DIR_MMP) {
         distributeData(rdata, index, minX1, minX2, maxX3);
         distributeData(rdata, index, minX1p1, minX2p1, maxX3m1);
         distributeData(rdata, index, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
index 508c9e90a2bced8560dcda6098d0fb4aea8b4d9a..d5769c726eda127ca603d6984744274abaf2edae 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
@@ -68,15 +68,15 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
index fbcfd9830db4e4c08d222471bbdb65e8524eaed5..625b8f6e3292cf4f56eab91536e5e9c8069b238d 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
@@ -55,21 +55,21 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
 	SPtr<EsoTwist3D>  fFrom, hFrom;
 	SPtr<EsoTwist3D>  fTo, hTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
index bab931d93dd99cf89f4517159cef1d6efc000eff..7987c2f6c8af52fbf897ff6bbcee47add3fc0056 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
@@ -50,6 +50,8 @@ TwoDistributionsFullVectorConnector::TwoDistributionsFullVectorConnector(SPtr<Bl
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsFullVectorConnector::init()
 {
+   using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -58,37 +60,37 @@ void TwoDistributionsFullVectorConnector::init()
    int anz = 2*27;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
index 406acb5b8707609811d35da46034db4cfec7c9c3..b2cb384d652273aee82c992c50d4df9b1e46a4e9 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
@@ -62,15 +62,15 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
 
    SPtr<EsoTwist3D>  hDis;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
index 0585947928e4fdc626659ed1a1d0e956fdd62de6..07e62e78a57fa8817ec8f4b7c20bce693697788d 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
@@ -4,66 +4,66 @@
 
 D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA() = default;
 //////////////////////////////////////////////////////////////////////////
-D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, LBMReal value)
+D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
-    d.E = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.W = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.N = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.S = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.T = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.B = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.NE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.SW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.SE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.NW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TN = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BS = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BN = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TS = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TNE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TNW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TSE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TSW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BNE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BNW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BSE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BSW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.E = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.W = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.N = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.S = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.T = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.B = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.NE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.SW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.SE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.NW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TN = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BS = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BN = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TS = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TNE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TNW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TSE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TSW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BNE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BNW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BSE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BSW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
     d.REST =
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx1, nx2, nx3, value));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx1, nx2, nx3, value));
 }
 //////////////////////////////////////////////////////////////////////////
 D3Q27EsoTwist3DSoA::~D3Q27EsoTwist3DSoA() = default;
@@ -85,114 +85,120 @@ void D3Q27EsoTwist3DSoA::swap()
     std::swap(d.TSW, d.BNE);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::getDistribution(real *const f, size_t x1, size_t x2, size_t x3)
 {
+    using namespace vf::lbm::dir;
+
     size_t x1p = x1 + 1;
     size_t x2p = x2 + 1;
     size_t x3p = x3 + 1;
 
-    f[D3Q27System::DIR_P00]   = (*d.E)(x1, x2, x3);
-    f[D3Q27System::DIR_0P0]   = (*d.N)(x1, x2, x3);
-    f[D3Q27System::DIR_00P]   = (*d.T)(x1, x2, x3);
-    f[D3Q27System::DIR_PP0]  = (*d.NE)(x1, x2, x3);
-    f[D3Q27System::DIR_MP0]  = (*d.NW)(x1p, x2, x3);
-    f[D3Q27System::DIR_P0P]  = (*d.TE)(x1, x2, x3);
-    f[D3Q27System::DIR_M0P]  = (*d.TW)(x1p, x2, x3);
-    f[D3Q27System::DIR_0PP]  = (*d.TN)(x1, x2, x3);
-    f[D3Q27System::DIR_0MP]  = (*d.TS)(x1, x2p, x3);
-    f[D3Q27System::DIR_PPP] = (*d.TNE)(x1, x2, x3);
-    f[D3Q27System::DIR_MPP] = (*d.TNW)(x1p, x2, x3);
-    f[D3Q27System::DIR_PMP] = (*d.TSE)(x1, x2p, x3);
-    f[D3Q27System::DIR_MMP] = (*d.TSW)(x1p, x2p, x3);
+    f[DIR_P00]   = (*d.E)(x1, x2, x3);
+    f[DIR_0P0]   = (*d.N)(x1, x2, x3);
+    f[DIR_00P]   = (*d.T)(x1, x2, x3);
+    f[DIR_PP0]  = (*d.NE)(x1, x2, x3);
+    f[DIR_MP0]  = (*d.NW)(x1p, x2, x3);
+    f[DIR_P0P]  = (*d.TE)(x1, x2, x3);
+    f[DIR_M0P]  = (*d.TW)(x1p, x2, x3);
+    f[DIR_0PP]  = (*d.TN)(x1, x2, x3);
+    f[DIR_0MP]  = (*d.TS)(x1, x2p, x3);
+    f[DIR_PPP] = (*d.TNE)(x1, x2, x3);
+    f[DIR_MPP] = (*d.TNW)(x1p, x2, x3);
+    f[DIR_PMP] = (*d.TSE)(x1, x2p, x3);
+    f[DIR_MMP] = (*d.TSW)(x1p, x2p, x3);
 
-    f[D3Q27System::DIR_M00]   = (*d.W)(x1p, x2, x3);
-    f[D3Q27System::DIR_0M0]   = (*d.S)(x1, x2p, x3);
-    f[D3Q27System::DIR_00M]   = (*d.B)(x1, x2, x3p);
-    f[D3Q27System::DIR_MM0]  = (*d.SW)(x1p, x2p, x3);
-    f[D3Q27System::DIR_PM0]  = (*d.SE)(x1, x2p, x3);
-    f[D3Q27System::DIR_M0M]  = (*d.BW)(x1p, x2, x3p);
-    f[D3Q27System::DIR_P0M]  = (*d.BE)(x1, x2, x3p);
-    f[D3Q27System::DIR_0MM]  = (*d.BS)(x1, x2p, x3p);
-    f[D3Q27System::DIR_0PM]  = (*d.BN)(x1, x2, x3p);
-    f[D3Q27System::DIR_MMM] = (*d.BSW)(x1p, x2p, x3p);
-    f[D3Q27System::DIR_PMM] = (*d.BSE)(x1, x2p, x3p);
-    f[D3Q27System::DIR_MPM] = (*d.BNW)(x1p, x2, x3p);
-    f[D3Q27System::DIR_PPM] = (*d.BNE)(x1, x2, x3p);
+    f[DIR_M00]   = (*d.W)(x1p, x2, x3);
+    f[DIR_0M0]   = (*d.S)(x1, x2p, x3);
+    f[DIR_00M]   = (*d.B)(x1, x2, x3p);
+    f[DIR_MM0]  = (*d.SW)(x1p, x2p, x3);
+    f[DIR_PM0]  = (*d.SE)(x1, x2p, x3);
+    f[DIR_M0M]  = (*d.BW)(x1p, x2, x3p);
+    f[DIR_P0M]  = (*d.BE)(x1, x2, x3p);
+    f[DIR_0MM]  = (*d.BS)(x1, x2p, x3p);
+    f[DIR_0PM]  = (*d.BN)(x1, x2, x3p);
+    f[DIR_MMM] = (*d.BSW)(x1p, x2p, x3p);
+    f[DIR_PMM] = (*d.BSE)(x1, x2p, x3p);
+    f[DIR_MPM] = (*d.BNW)(x1p, x2, x3p);
+    f[DIR_PPM] = (*d.BNE)(x1, x2, x3p);
 
-    f[D3Q27System::DIR_000] = (*d.REST)(x1, x2, x3);
+    f[DIR_000] = (*d.REST)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::setDistribution(const real *const f, size_t x1, size_t x2, size_t x3)
 {
+    using namespace vf::lbm::dir;
+
     size_t x1p = x1 + 1;
     size_t x2p = x2 + 1;
     size_t x3p = x3 + 1;
 
-    (*d.E)(x1, x2, x3)     = f[D3Q27System::INV_P00];
-    (*d.N)(x1, x2, x3)     = f[D3Q27System::INV_0P0];
-    (*d.T)(x1, x2, x3)     = f[D3Q27System::INV_00P];
-    (*d.NE)(x1, x2, x3)    = f[D3Q27System::INV_PP0];
-    (*d.NW)(x1p, x2, x3)   = f[D3Q27System::INV_MP0];
-    (*d.TE)(x1, x2, x3)    = f[D3Q27System::INV_P0P];
-    (*d.TW)(x1p, x2, x3)   = f[D3Q27System::INV_M0P];
-    (*d.TN)(x1, x2, x3)    = f[D3Q27System::INV_0PP];
-    (*d.TS)(x1, x2p, x3)   = f[D3Q27System::INV_0MP];
-    (*d.TNE)(x1, x2, x3)   = f[D3Q27System::INV_PPP];
-    (*d.TNW)(x1p, x2, x3)  = f[D3Q27System::INV_MPP];
-    (*d.TSE)(x1, x2p, x3)  = f[D3Q27System::INV_PMP];
-    (*d.TSW)(x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+    (*d.E)(x1, x2, x3)     = f[INV_P00];
+    (*d.N)(x1, x2, x3)     = f[INV_0P0];
+    (*d.T)(x1, x2, x3)     = f[INV_00P];
+    (*d.NE)(x1, x2, x3)    = f[INV_PP0];
+    (*d.NW)(x1p, x2, x3)   = f[INV_MP0];
+    (*d.TE)(x1, x2, x3)    = f[INV_P0P];
+    (*d.TW)(x1p, x2, x3)   = f[INV_M0P];
+    (*d.TN)(x1, x2, x3)    = f[INV_0PP];
+    (*d.TS)(x1, x2p, x3)   = f[INV_0MP];
+    (*d.TNE)(x1, x2, x3)   = f[INV_PPP];
+    (*d.TNW)(x1p, x2, x3)  = f[INV_MPP];
+    (*d.TSE)(x1, x2p, x3)  = f[INV_PMP];
+    (*d.TSW)(x1p, x2p, x3) = f[INV_MMP];
 
-    (*d.W)(x1p, x2, x3)     = f[D3Q27System::INV_M00];
-    (*d.S)(x1, x2p, x3)     = f[D3Q27System::INV_0M0];
-    (*d.B)(x1, x2, x3p)     = f[D3Q27System::INV_00M];
-    (*d.SW)(x1p, x2p, x3)   = f[D3Q27System::INV_MM0];
-    (*d.SE)(x1, x2p, x3)    = f[D3Q27System::INV_PM0];
-    (*d.BW)(x1p, x2, x3p)   = f[D3Q27System::INV_M0M];
-    (*d.BE)(x1, x2, x3p)    = f[D3Q27System::INV_P0M];
-    (*d.BS)(x1, x2p, x3p)   = f[D3Q27System::INV_0MM];
-    (*d.BN)(x1, x2, x3p)    = f[D3Q27System::INV_0PM];
-    (*d.BSW)(x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-    (*d.BSE)(x1, x2p, x3p)  = f[D3Q27System::INV_PMM];
-    (*d.BNW)(x1p, x2, x3p)  = f[D3Q27System::INV_MPM];
-    (*d.BNE)(x1, x2, x3p)   = f[D3Q27System::INV_PPM];
+    (*d.W)(x1p, x2, x3)     = f[INV_M00];
+    (*d.S)(x1, x2p, x3)     = f[INV_0M0];
+    (*d.B)(x1, x2, x3p)     = f[INV_00M];
+    (*d.SW)(x1p, x2p, x3)   = f[INV_MM0];
+    (*d.SE)(x1, x2p, x3)    = f[INV_PM0];
+    (*d.BW)(x1p, x2, x3p)   = f[INV_M0M];
+    (*d.BE)(x1, x2, x3p)    = f[INV_P0M];
+    (*d.BS)(x1, x2p, x3p)   = f[INV_0MM];
+    (*d.BN)(x1, x2, x3p)    = f[INV_0PM];
+    (*d.BSW)(x1p, x2p, x3p) = f[INV_MMM];
+    (*d.BSE)(x1, x2p, x3p)  = f[INV_PMM];
+    (*d.BNW)(x1p, x2, x3p)  = f[INV_MPM];
+    (*d.BNE)(x1, x2, x3p)   = f[INV_PPM];
 
-    (*d.REST)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*d.REST)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::INV_P00]   = (*d.E)(x1, x2, x3);
-    f[D3Q27System::INV_0P0]   = (*d.N)(x1, x2, x3);
-    f[D3Q27System::INV_00P]   = (*d.T)(x1, x2, x3);
-    f[D3Q27System::INV_PP0]  = (*d.NE)(x1, x2, x3);
-    f[D3Q27System::INV_MP0]  = (*d.NW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_P0P]  = (*d.TE)(x1, x2, x3);
-    f[D3Q27System::INV_M0P]  = (*d.TW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_0PP]  = (*d.TN)(x1, x2, x3);
-    f[D3Q27System::INV_0MP]  = (*d.TS)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_PPP] = (*d.TNE)(x1, x2, x3);
-    f[D3Q27System::INV_MPP] = (*d.TNW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_PMP] = (*d.TSE)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_MMP] = (*d.TSW)(x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
+
+    f[INV_P00]   = (*d.E)(x1, x2, x3);
+    f[INV_0P0]   = (*d.N)(x1, x2, x3);
+    f[INV_00P]   = (*d.T)(x1, x2, x3);
+    f[INV_PP0]  = (*d.NE)(x1, x2, x3);
+    f[INV_MP0]  = (*d.NW)(x1 + 1, x2, x3);
+    f[INV_P0P]  = (*d.TE)(x1, x2, x3);
+    f[INV_M0P]  = (*d.TW)(x1 + 1, x2, x3);
+    f[INV_0PP]  = (*d.TN)(x1, x2, x3);
+    f[INV_0MP]  = (*d.TS)(x1, x2 + 1, x3);
+    f[INV_PPP] = (*d.TNE)(x1, x2, x3);
+    f[INV_MPP] = (*d.TNW)(x1 + 1, x2, x3);
+    f[INV_PMP] = (*d.TSE)(x1, x2 + 1, x3);
+    f[INV_MMP] = (*d.TSW)(x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::INV_M00]   = (*d.W)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_0M0]   = (*d.S)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_00M]   = (*d.B)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_MM0]  = (*d.SW)(x1 + 1, x2 + 1, x3);
-    f[D3Q27System::INV_PM0]  = (*d.SE)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_M0M]  = (*d.BW)(x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_P0M]  = (*d.BE)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_0MM]  = (*d.BS)(x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_0PM]  = (*d.BN)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_MMM] = (*d.BSW)(x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_PMM] = (*d.BSE)(x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_MPM] = (*d.BNW)(x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_PPM] = (*d.BNE)(x1, x2, x3 + 1);
+    f[INV_M00]   = (*d.W)(x1 + 1, x2, x3);
+    f[INV_0M0]   = (*d.S)(x1, x2 + 1, x3);
+    f[INV_00M]   = (*d.B)(x1, x2, x3 + 1);
+    f[INV_MM0]  = (*d.SW)(x1 + 1, x2 + 1, x3);
+    f[INV_PM0]  = (*d.SE)(x1, x2 + 1, x3);
+    f[INV_M0M]  = (*d.BW)(x1 + 1, x2, x3 + 1);
+    f[INV_P0M]  = (*d.BE)(x1, x2, x3 + 1);
+    f[INV_0MM]  = (*d.BS)(x1, x2 + 1, x3 + 1);
+    f[INV_0PM]  = (*d.BN)(x1, x2, x3 + 1);
+    f[INV_MMM] = (*d.BSW)(x1 + 1, x2 + 1, x3 + 1);
+    f[INV_PMM] = (*d.BSE)(x1, x2 + 1, x3 + 1);
+    f[INV_MPM] = (*d.BNW)(x1 + 1, x2, x3 + 1);
+    f[INV_PPM] = (*d.BNE)(x1, x2, x3 + 1);
 
-    f[D3Q27System::DIR_000] = (*d.REST)(x1, x2, x3);
+    f[DIR_000] = (*d.REST)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3)
 {
     //(*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[D3Q27System::DIR_P00];
     //(*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[D3Q27System::DIR_0P0];
@@ -225,7 +231,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInv(const LBMReal *const f, size_t x1, s
     //(*this->zeroDistributions)(x1,x2,x3) = f[D3Q27System::REST];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                      unsigned long int direction)
 {
     // bool directionFlag = false;
@@ -288,7 +294,7 @@ void D3Q27EsoTwist3DSoA::setDistributionForDirection(const LBMReal *const f, siz
     //#endif //DEBUG
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction)
+void D3Q27EsoTwist3DSoA::setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction)
 {
     // switch (direction)
     //{
@@ -378,7 +384,7 @@ void D3Q27EsoTwist3DSoA::setDistributionForDirection(LBMReal f, size_t x1, size_
     //}
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                         unsigned long int direction)
 {
     //   bool directionFlag = false;
@@ -444,7 +450,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const LBMReal *const f,
     //#endif //DEBUG
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                         unsigned long int direction)
 {
     // switch (direction)
@@ -535,7 +541,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(LBMReal f, size_t x1, si
     //}
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSoA::getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/,
+real D3Q27EsoTwist3DSoA::getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/,
                                                            int /*direction*/)
 {
     // switch (direction)
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
index a3eb85e378051871a018066a5aabe58969790b73..204e6fe15f69a387c289ae8c60f63d59ef62ddc3 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
@@ -9,67 +9,67 @@
 //#include <boost/serialization/base_object.hpp>
 
 struct Distributions {
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr E;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr W;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr N;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr S;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr T;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr B;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr NE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr SW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr SE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr NW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TN;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BS;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BN;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TS;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TNE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TNW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TSE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TSW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BNE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BNW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BSE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BSW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr REST;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr E;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr W;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr N;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr S;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr T;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr B;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr NE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr SW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr SE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr NW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TN;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BS;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BN;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TS;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TNE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TNW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TSE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TSW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BNE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BNW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BSE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BSW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr REST;
 };
 
 class D3Q27EsoTwist3DSoA : public EsoTwist3D
 {
 public:
     D3Q27EsoTwist3DSoA();
-    D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, LBMReal value);
+    D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, real value);
     //////////////////////////////////////////////////////////////////////////
     ~D3Q27EsoTwist3DSoA() override;
     //////////////////////////////////////////////////////////////////////////
     void swap() override;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override;
     //////////////////////////////////////////////////////////////////////////
@@ -79,7 +79,7 @@ public:
     //////////////////////////////////////////////////////////////////////////
     Distributions getDistributions();
     //////////////////////////////////////////////////////////////////////////
-    void getDistributionAfterLastStep(LBMReal *const f, size_t x1, size_t x2, size_t x3);
+    void getDistributionAfterLastStep(real *const f, size_t x1, size_t x2, size_t x3);
 
 protected:
     Distributions d;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
index 684238baad1752ab3fad051666da459fd8e11095..4660e7b8397482683d67e6ba74b466b1857df10c 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
@@ -36,298 +36,310 @@
 
 D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector() = default;
 //////////////////////////////////////////////////////////////////////////
-D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, LBMReal value)
+D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
     this->localDistributions =
-        std::make_shared<CbArray4D<LBMReal, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
+        std::make_shared<CbArray4D<real, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
     this->nonLocalDistributions =
-        std::make_shared<CbArray4D<LBMReal, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
+        std::make_shared<CbArray4D<real, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
 
-    this->zeroDistributions = std::make_shared<CbArray3D<LBMReal, IndexerX3X2X1>>(nx1, nx2, nx3, value);
+    this->zeroDistributions = std::make_shared<CbArray3D<real, IndexerX3X2X1>>(nx1, nx2, nx3, value);
 }
 //////////////////////////////////////////////////////////////////////////
 D3Q27EsoTwist3DSplittedVector::~D3Q27EsoTwist3DSplittedVector() = default;
 //////////////////////////////////////////////////////////////////////////
 void D3Q27EsoTwist3DSplittedVector::swap() { std::swap(this->localDistributions, this->nonLocalDistributions); }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::getDistribution(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::DIR_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-    f[D3Q27System::DIR_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-    f[D3Q27System::DIR_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-    f[D3Q27System::DIR_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-    f[D3Q27System::DIR_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-    f[D3Q27System::DIR_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-    f[D3Q27System::DIR_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-    f[D3Q27System::DIR_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
 
-    f[D3Q27System::DIR_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-    f[D3Q27System::DIR_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::DIR_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::DIR_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+    f[DIR_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+    f[DIR_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+    f[DIR_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+    f[DIR_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+    f[DIR_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
+    f[DIR_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+    f[DIR_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
+    f[DIR_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+    f[DIR_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
+    f[DIR_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+    f[DIR_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
+    f[DIR_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
+    f[DIR_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
+    f[DIR_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
+    f[DIR_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
+    f[DIR_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
+    f[DIR_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
+    f[DIR_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
+    f[DIR_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
+    f[DIR_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
+    f[DIR_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
+    f[DIR_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
+    f[DIR_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
+    f[DIR_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
+    f[DIR_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
+    f[DIR_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+
+    f[DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::setDistribution(const real *const f, size_t x1, size_t x2, size_t x3)
 {
-    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[D3Q27System::INV_P00];
-    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[D3Q27System::INV_0P0];
-    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[D3Q27System::INV_00P];
-    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[D3Q27System::INV_PP0];
-    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[D3Q27System::INV_MP0];
-    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[D3Q27System::INV_P0P];
-    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[D3Q27System::INV_M0P];
-    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[D3Q27System::INV_0PP];
-    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[D3Q27System::INV_0MP];
-    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[D3Q27System::INV_PPP];
-    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[D3Q27System::INV_MPP];
-    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[D3Q27System::INV_PMP];
-    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::INV_MMP];
+    using namespace vf::lbm::dir;
+
+    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[INV_P00];
+    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[INV_0P0];
+    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[INV_00P];
+    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[INV_PP0];
+    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[INV_MP0];
+    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[INV_P0P];
+    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[INV_M0P];
+    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[INV_0PP];
+    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[INV_0MP];
+    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[INV_PPP];
+    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[INV_MPP];
+    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[INV_PMP];
+    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[INV_MMP];
 
-    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[D3Q27System::INV_M00];
-    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[D3Q27System::INV_0M0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[D3Q27System::INV_00M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[D3Q27System::INV_MM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[D3Q27System::INV_PM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[D3Q27System::INV_M0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[D3Q27System::INV_P0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[D3Q27System::INV_0MM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[D3Q27System::INV_0PM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::INV_MMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[D3Q27System::INV_PMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[D3Q27System::INV_MPM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[D3Q27System::INV_PPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[INV_M00];
+    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[INV_0M0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[INV_00M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[INV_MM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[INV_PM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[INV_M0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[INV_P0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[INV_0MM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[INV_0PM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[INV_MMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[INV_PMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[INV_MPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[INV_PPM];
 
-    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::INV_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-    f[D3Q27System::INV_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-    f[D3Q27System::INV_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-    f[D3Q27System::INV_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-    f[D3Q27System::INV_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-    f[D3Q27System::INV_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-    f[D3Q27System::INV_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-    f[D3Q27System::INV_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-    f[D3Q27System::INV_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-    f[D3Q27System::INV_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
+
+    f[INV_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+    f[INV_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+    f[INV_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+    f[INV_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+    f[INV_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
+    f[INV_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+    f[INV_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
+    f[INV_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+    f[INV_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
+    f[INV_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+    f[INV_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
+    f[INV_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
+    f[INV_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::INV_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-    f[D3Q27System::INV_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-    f[D3Q27System::INV_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-    f[D3Q27System::INV_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-    f[D3Q27System::INV_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-    f[D3Q27System::INV_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-    f[D3Q27System::INV_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-    f[D3Q27System::INV_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+    f[INV_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
+    f[INV_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
+    f[INV_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
+    f[INV_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
+    f[INV_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
+    f[INV_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
+    f[INV_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
+    f[INV_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
+    f[INV_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
+    f[INV_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
+    f[INV_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
+    f[INV_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
+    f[INV_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
 
-    f[D3Q27System::DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
+    f[DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3)
 {
-    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[D3Q27System::DIR_P00];
-    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[D3Q27System::DIR_0P0];
-    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[D3Q27System::DIR_00P];
-    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[D3Q27System::DIR_PP0];
-    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[D3Q27System::DIR_MP0];
-    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[D3Q27System::DIR_P0P];
-    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[D3Q27System::DIR_M0P];
-    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[D3Q27System::DIR_0PP];
-    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[D3Q27System::DIR_0MP];
-    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[D3Q27System::DIR_PPP];
-    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[D3Q27System::DIR_MPP];
-    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[D3Q27System::DIR_PMP];
-    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MMP];
+    using namespace vf::lbm::dir;
 
-    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[D3Q27System::DIR_M00];
-    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[D3Q27System::DIR_0M0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[D3Q27System::DIR_00M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[D3Q27System::DIR_MM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[D3Q27System::DIR_PM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[D3Q27System::DIR_M0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[D3Q27System::DIR_P0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[D3Q27System::DIR_0MM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[D3Q27System::DIR_0PM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[D3Q27System::DIR_PMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[D3Q27System::DIR_MPM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[D3Q27System::DIR_PPM];
+    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[DIR_P00];
+    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[DIR_0P0];
+    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[DIR_00P];
+    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[DIR_PP0];
+    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[DIR_MP0];
+    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[DIR_P0P];
+    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[DIR_M0P];
+    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[DIR_0PP];
+    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[DIR_0MP];
+    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[DIR_PPP];
+    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[DIR_MPP];
+    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[DIR_PMP];
+    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_MMP];
 
-    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[DIR_M00];
+    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[DIR_0M0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[DIR_00M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[DIR_MM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[DIR_PM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[DIR_M0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[DIR_P0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[DIR_0MM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[DIR_0PM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_MMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[DIR_PMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[DIR_MPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[DIR_PPM];
+
+    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                                 unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     if ((direction & EsoTwistD3Q27System::etE) == EsoTwistD3Q27System::etE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[D3Q27System::DIR_P00];
+        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[DIR_P00];
     if ((direction & EsoTwistD3Q27System::etW) == EsoTwistD3Q27System::etW)
-        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::DIR_M00];
+        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[DIR_M00];
     if ((direction & EsoTwistD3Q27System::etS) == EsoTwistD3Q27System::etS)
-        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::DIR_0M0];
+        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[DIR_0M0];
     if ((direction & EsoTwistD3Q27System::etN) == EsoTwistD3Q27System::etN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[D3Q27System::DIR_0P0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[DIR_0P0];
     if ((direction & EsoTwistD3Q27System::etB) == EsoTwistD3Q27System::etB)
-        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::DIR_00M];
+        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[DIR_00M];
     if ((direction & EsoTwistD3Q27System::etT) == EsoTwistD3Q27System::etT)
-        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[D3Q27System::DIR_00P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[DIR_00P];
     if ((direction & EsoTwistD3Q27System::etSW) == EsoTwistD3Q27System::etSW)
-        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::DIR_MM0];
+        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[DIR_MM0];
     if ((direction & EsoTwistD3Q27System::etNE) == EsoTwistD3Q27System::etNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_PP0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[DIR_PP0];
     if ((direction & EsoTwistD3Q27System::etNW) == EsoTwistD3Q27System::etNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[D3Q27System::DIR_MP0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[DIR_MP0];
     if ((direction & EsoTwistD3Q27System::etSE) == EsoTwistD3Q27System::etSE)
-        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[D3Q27System::DIR_PM0];
+        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[DIR_PM0];
     if ((direction & EsoTwistD3Q27System::etBW) == EsoTwistD3Q27System::etBW)
-        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::DIR_M0M];
+        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[DIR_M0M];
     if ((direction & EsoTwistD3Q27System::etTE) == EsoTwistD3Q27System::etTE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_P0P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[DIR_P0P];
     if ((direction & EsoTwistD3Q27System::etTW) == EsoTwistD3Q27System::etTW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[D3Q27System::DIR_M0P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[DIR_M0P];
     if ((direction & EsoTwistD3Q27System::etBE) == EsoTwistD3Q27System::etBE)
-        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[D3Q27System::DIR_P0M];
+        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[DIR_P0M];
     if ((direction & EsoTwistD3Q27System::etBS) == EsoTwistD3Q27System::etBS)
-        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::DIR_0MM];
+        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[DIR_0MM];
     if ((direction & EsoTwistD3Q27System::etTN) == EsoTwistD3Q27System::etTN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_0PP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[DIR_0PP];
     if ((direction & EsoTwistD3Q27System::etTS) == EsoTwistD3Q27System::etTS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[D3Q27System::DIR_0MP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[DIR_0MP];
     if ((direction & EsoTwistD3Q27System::etBN) == EsoTwistD3Q27System::etBN)
-        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[D3Q27System::DIR_0PM];
+        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[DIR_0PM];
     if ((direction & EsoTwistD3Q27System::etBSW) == EsoTwistD3Q27System::etBSW)
-        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::DIR_MMM];
+        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[DIR_MMM];
     if ((direction & EsoTwistD3Q27System::etTNE) == EsoTwistD3Q27System::etTNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_PPP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_PPP];
     if ((direction & EsoTwistD3Q27System::etBSE) == EsoTwistD3Q27System::etBSE)
-        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[D3Q27System::DIR_PMM];
+        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[DIR_PMM];
     if ((direction & EsoTwistD3Q27System::etTNW) == EsoTwistD3Q27System::etTNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MPP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[DIR_MPP];
     if ((direction & EsoTwistD3Q27System::etBNW) == EsoTwistD3Q27System::etBNW)
-        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[D3Q27System::DIR_MPM];
+        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[DIR_MPM];
     if ((direction & EsoTwistD3Q27System::etTSE) == EsoTwistD3Q27System::etTSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_PMP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[DIR_PMP];
     if ((direction & EsoTwistD3Q27System::etBNE) == EsoTwistD3Q27System::etBNE)
-        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_PPM];
+        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_PPM];
     if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::DIR_MMP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[DIR_MMP];
     if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST)
-        (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+        (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                                 int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             (*this->zeroDistributions)(x1, x2, x3) = f;
             break;
         default:
@@ -335,148 +347,152 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2,
+void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(const real *const f, size_t x1, size_t x2,
                                                                    size_t x3, unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     if ((direction & EsoTwistD3Q27System::etE) == EsoTwistD3Q27System::etE)
-        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::DIR_P00];
+        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[DIR_P00];
     if ((direction & EsoTwistD3Q27System::etW) == EsoTwistD3Q27System::etW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[D3Q27System::DIR_M00];
+        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[DIR_M00];
     if ((direction & EsoTwistD3Q27System::etS) == EsoTwistD3Q27System::etS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[D3Q27System::DIR_0M0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[DIR_0M0];
     if ((direction & EsoTwistD3Q27System::etN) == EsoTwistD3Q27System::etN)
-        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::DIR_0P0];
+        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[DIR_0P0];
     if ((direction & EsoTwistD3Q27System::etB) == EsoTwistD3Q27System::etB)
-        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[D3Q27System::DIR_00M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[DIR_00M];
     if ((direction & EsoTwistD3Q27System::etT) == EsoTwistD3Q27System::etT)
-        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::DIR_00P];
+        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[DIR_00P];
     if ((direction & EsoTwistD3Q27System::etSW) == EsoTwistD3Q27System::etSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MM0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[DIR_MM0];
     if ((direction & EsoTwistD3Q27System::etNE) == EsoTwistD3Q27System::etNE)
-        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::DIR_PP0];
+        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[DIR_PP0];
     if ((direction & EsoTwistD3Q27System::etNW) == EsoTwistD3Q27System::etNW)
-        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[D3Q27System::DIR_MP0];
+        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[DIR_MP0];
     if ((direction & EsoTwistD3Q27System::etSE) == EsoTwistD3Q27System::etSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[D3Q27System::DIR_PM0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[DIR_PM0];
     if ((direction & EsoTwistD3Q27System::etBW) == EsoTwistD3Q27System::etBW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_M0M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[DIR_M0M];
     if ((direction & EsoTwistD3Q27System::etTE) == EsoTwistD3Q27System::etTE)
-        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::DIR_P0P];
+        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[DIR_P0P];
     if ((direction & EsoTwistD3Q27System::etTW) == EsoTwistD3Q27System::etTW)
-        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[D3Q27System::DIR_M0P];
+        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[DIR_M0P];
     if ((direction & EsoTwistD3Q27System::etBE) == EsoTwistD3Q27System::etBE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[D3Q27System::DIR_P0M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[DIR_P0M];
     if ((direction & EsoTwistD3Q27System::etBS) == EsoTwistD3Q27System::etBS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_0MM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[DIR_0MM];
     if ((direction & EsoTwistD3Q27System::etTN) == EsoTwistD3Q27System::etTN)
-        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::DIR_0PP];
+        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[DIR_0PP];
     if ((direction & EsoTwistD3Q27System::etTS) == EsoTwistD3Q27System::etTS)
-        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[D3Q27System::DIR_0MP];
+        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[DIR_0MP];
     if ((direction & EsoTwistD3Q27System::etBN) == EsoTwistD3Q27System::etBN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[D3Q27System::DIR_0PM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[DIR_0PM];
     if ((direction & EsoTwistD3Q27System::etBSW) == EsoTwistD3Q27System::etBSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MMM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_MMM];
     if ((direction & EsoTwistD3Q27System::etTNE) == EsoTwistD3Q27System::etTNE)
-        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::DIR_PPP];
+        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[DIR_PPP];
     if ((direction & EsoTwistD3Q27System::etBSE) == EsoTwistD3Q27System::etBSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_PMM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[DIR_PMM];
     if ((direction & EsoTwistD3Q27System::etTNW) == EsoTwistD3Q27System::etTNW)
-        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[D3Q27System::DIR_MPP];
+        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[DIR_MPP];
     if ((direction & EsoTwistD3Q27System::etBNW) == EsoTwistD3Q27System::etBNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_MPM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[DIR_MPM];
     if ((direction & EsoTwistD3Q27System::etTSE) == EsoTwistD3Q27System::etTSE)
-        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[D3Q27System::DIR_PMP];
+        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[DIR_PMP];
     if ((direction & EsoTwistD3Q27System::etBNE) == EsoTwistD3Q27System::etBNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::DIR_PPM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[DIR_PPM];
     if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW)
-        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MMP];
+        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_MMP];
     if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST)
-        (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+        (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                                    unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             (*this->zeroDistributions)(x1, x2, x3) = f;
             break;
         default:
@@ -484,124 +500,128 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, si
     }
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)
+real D3Q27EsoTwist3DSplittedVector::getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             return (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             return (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             return (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             return (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             return (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             return (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             return (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             return (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             return (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             return (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             return (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             return (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             return (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_000:
+        case DIR_000:
             return (*this->zeroDistributions)(x1, x2, x3);
         default:
             UB_THROW(UbException(UB_EXARGS, "Direction didn't find"));
     }
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)
+real D3Q27EsoTwist3DSplittedVector::getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             return (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             return (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             return (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             return (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             return (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             return (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             return (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             return (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             return (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             return (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             return (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             return (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             return (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_000:
+        case DIR_000:
             return (*this->zeroDistributions)(x1, x2, x3);
         default:
             UB_THROW(UbException(UB_EXARGS, "Direction didn't find"));
@@ -614,17 +634,17 @@ size_t D3Q27EsoTwist3DSplittedVector::getNX2() const { return NX2; }
 //////////////////////////////////////////////////////////////////////////
 size_t D3Q27EsoTwist3DSplittedVector::getNX3() const { return NX3; }
 //////////////////////////////////////////////////////////////////////////
-CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getLocalDistributions()
+CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getLocalDistributions()
 {
     return this->localDistributions;
 }
 //////////////////////////////////////////////////////////////////////////
-CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getNonLocalDistributions()
+CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getNonLocalDistributions()
 {
     return this->nonLocalDistributions;
 }
 //////////////////////////////////////////////////////////////////////////
-CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr D3Q27EsoTwist3DSplittedVector::getZeroDistributions()
+CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr D3Q27EsoTwist3DSplittedVector::getZeroDistributions()
 {
     return this->zeroDistributions;
 }
@@ -635,17 +655,17 @@ void D3Q27EsoTwist3DSplittedVector::setNX2(size_t newNX2) { NX2 = newNX2; }
 //////////////////////////////////////////////////////////////////////////
 void D3Q27EsoTwist3DSplittedVector::setNX3(size_t newNX3) { NX3 = newNX3; }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array)
 {
     localDistributions = array;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array)
 {
     nonLocalDistributions = array;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr array)
 {
     zeroDistributions = array;
 }
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
index 1c0d7d05f1392c8c116863e9e0b41000c90ed15e..060e2cb3ad367d31d6b30577f370cd1b692daecd 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
@@ -49,34 +49,34 @@ public:
     //! \param nx2 number of nodes in x2 direction
     //! \param nx3 number of nodes in x3 direction
     //! \param value initialisation value
-    D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, LBMReal value);
+    D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, real value);
     //////////////////////////////////////////////////////////////////////////
     ~D3Q27EsoTwist3DSplittedVector() override;
     //////////////////////////////////////////////////////////////////////////
     void swap() override;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override;
     //////////////////////////////////////////////////////////////////////////
@@ -84,23 +84,23 @@ public:
     //////////////////////////////////////////////////////////////////////////
     size_t getNX3() const override;
     //////////////////////////////////////////////////////////////////////////
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr getLocalDistributions();
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr getLocalDistributions();
     //////////////////////////////////////////////////////////////////////////
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr getNonLocalDistributions();
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr getNonLocalDistributions();
     //////////////////////////////////////////////////////////////////////////
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr getZeroDistributions();
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr getZeroDistributions();
     //////////////////////////////////////////////////////////////////////////
     void setNX1(size_t newNX1);
     void setNX2(size_t newNX2);
     void setNX3(size_t newNX3);
-    void setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array);
-    void setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array);
-    void setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr array);
+    void setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array);
+    void setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array);
+    void setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr array);
 
 protected:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
     size_t NX1, NX2, NX3;
 
     friend class MPIIORestartCoProcessor;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
index d67341e1af96b3914df478994f3097b64bf78302..07b0abb6aafd34510eedb2df7829d39239ecb13f 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
@@ -1,16 +1,16 @@
 #include "D3Q27EsoTwist3DSplittedVectorEx.h"
 
-D3Q27EsoTwist3DSplittedVectorEx::D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, LBMReal value)
+D3Q27EsoTwist3DSplittedVectorEx::D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
-    this->localDistributions = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-        new CbArray4D<LBMReal, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
-    this->nonLocalDistributions = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-        new CbArray4D<LBMReal, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
+    this->localDistributions = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+        new CbArray4D<real, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
+    this->nonLocalDistributions = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+        new CbArray4D<real, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
 
     this->zeroDistributions =
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx1, nx2, nx3, value));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx1, nx2, nx3, value));
 }
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
index 54f9d55e5c4df0891cf40cc058a4ceaae934626a..e5481f4c86c80c4c9d3f6d64c404b8c279268f9b 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
@@ -6,7 +6,7 @@
 class D3Q27EsoTwist3DSplittedVectorEx : public D3Q27EsoTwist3DSplittedVector
 {
 public:
-    D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, LBMReal value);
+    D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, real value);
 
 protected:
 private:
diff --git a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
index 12b9e57489e39c15e912b5b679b768c16b89a83b..65758b1051cfd70d8495cc8d0d4af811409887e7 100644
--- a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
@@ -40,11 +40,11 @@
 #include "basics/container/CbArray3D.h"
 #include "basics/container/CbArray4D.h"
 
-using AverageValuesArray3D     = CbArray4D<LBMReal, IndexerX4X3X2X1>;
-using ShearStressValuesArray3D = CbArray4D<LBMReal, IndexerX4X3X2X1>;
-using RelaxationFactorArray3D  = CbArray3D<LBMReal, IndexerX3X2X1>;
-using PhaseFieldArray3D        = CbArray3D<LBMReal, IndexerX3X2X1>;
-using PressureFieldArray3D     = CbArray3D<LBMReal, IndexerX3X2X1>;
+using AverageValuesArray3D     = CbArray4D<real, IndexerX4X3X2X1>;
+using ShearStressValuesArray3D = CbArray4D<real, IndexerX4X3X2X1>;
+using RelaxationFactorArray3D  = CbArray3D<real, IndexerX3X2X1>;
+using PhaseFieldArray3D        = CbArray3D<real, IndexerX3X2X1>;
+using PressureFieldArray3D     = CbArray3D<real, IndexerX3X2X1>;
 
 //! A class provides an interface for data structures in the kernel.
 class DataSet3D
diff --git a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
index 8fe4dccea1b53da0513a093e8a741cd0071caf48..fff57191d5172e2f3c085b6f8753018c58fae42a 100644
--- a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
@@ -55,39 +55,39 @@ public:
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! get distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x1 coordinate x2
     //! \param x1 coordinate x3
-    virtual void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x1 coordinate x2
     //! \param x1 coordinate x3
-    virtual void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                              unsigned long int direction)                               = 0;
-    virtual void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) = 0;
-    virtual LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)      = 0;
-    virtual void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) = 0;
+    virtual real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)      = 0;
+    virtual void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                 unsigned long int direction)                            = 0;
-    virtual void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                 unsigned long int direction)                            = 0;
-    virtual LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)         = 0;
+    virtual real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)         = 0;
     virtual void swap()                                                                                 = 0;
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
index 319a9200cc204b0f9b869b2e52353e717a89d783..6a65255adfaf48d415c76b66364f3f34966572c0 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
@@ -63,31 +63,31 @@ public:
     //////////////////////////////////////////////////////////////////////////
     void swap() override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    // virtual void getDistributionInvForDirection(LBMReal* const& f, const size_t& x1, const size_t& x2, const size_t&
+    // virtual void getDistributionInvForDirection(real* const& f, const size_t& x1, const size_t& x2, const size_t&
     // x3, const unsigned long int& direction) = 0;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override = 0;
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
index b9a2e069edea6fa3e514b64e4e4174f0964a3a20..c28f1d4896619d72f27d7cd99579295fa46c68a5 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
@@ -44,12 +44,13 @@ const int EsoTwistD3Q27System::ETX3[EsoTwistD3Q27System::ENDF + 1] = { 0, 0, 0,
                                                                        0, 1, 1, 0, 0, -1, 0, -1, 0, 1, 0, 1, 0 };
 
 const int EsoTwistD3Q27System::etINVDIR[EsoTwistD3Q27System::ENDF + 1] = {
-    D3Q27System::INV_P00,   D3Q27System::INV_M00,   D3Q27System::INV_0P0,   D3Q27System::INV_0M0,   D3Q27System::INV_00P,
-    D3Q27System::INV_00M,   D3Q27System::INV_PP0,  D3Q27System::INV_MM0,  D3Q27System::INV_PM0,  D3Q27System::INV_MP0,
-    D3Q27System::INV_P0P,  D3Q27System::INV_M0M,  D3Q27System::INV_P0M,  D3Q27System::INV_M0P,  D3Q27System::INV_0PP,
-    D3Q27System::INV_0MM,  D3Q27System::INV_0PM,  D3Q27System::INV_0MP,  D3Q27System::INV_PPP, D3Q27System::INV_MPP,
-    D3Q27System::INV_PMP, D3Q27System::INV_MMP, D3Q27System::INV_PPM, D3Q27System::INV_MPM, D3Q27System::INV_PMM,
-    D3Q27System::INV_MMM, D3Q27System::DIR_000
+
+    vf::lbm::dir::INV_P00,   vf::lbm::dir::INV_M00,   vf::lbm::dir::INV_0P0,   vf::lbm::dir::INV_0M0,   vf::lbm::dir::INV_00P,
+    vf::lbm::dir::INV_00M,   vf::lbm::dir::INV_PP0,  vf::lbm::dir::INV_MM0,  vf::lbm::dir::INV_PM0,  vf::lbm::dir::INV_MP0,
+    vf::lbm::dir::INV_P0P,  vf::lbm::dir::INV_M0M,  vf::lbm::dir::INV_P0M,  vf::lbm::dir::INV_M0P,  vf::lbm::dir::INV_0PP,
+    vf::lbm::dir::INV_0MM,  vf::lbm::dir::INV_0PM,  vf::lbm::dir::INV_0MP,  vf::lbm::dir::INV_PPP, vf::lbm::dir::INV_MPP,
+    vf::lbm::dir::INV_PMP, vf::lbm::dir::INV_MMP, vf::lbm::dir::INV_PPM, vf::lbm::dir::INV_MPM, vf::lbm::dir::INV_PMM,
+    vf::lbm::dir::INV_MMM, vf::lbm::dir::DIR_000
 };
 
 const unsigned long int EsoTwistD3Q27System::etDIR[EsoTwistD3Q27System::ENDF + 1] = {
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
index 7ccd413c662206ab3a12b3a1c88fcc81450f7a75..74cd5b9b8fe0aeb58fad65c34c5231abb8eb4b8c 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
@@ -44,63 +44,63 @@ struct EsoTwistD3Q27System {
     const static int STARTF = D3Q27System::STARTF;
     const static int ENDF   = D3Q27System::ENDF;
 
-    const static int STARTDIR = D3Q27System::STARTDIR;
+ //   const static int STARTDIR = D3Q27System::STARTDIR;
     const static int ENDDIR   = D3Q27System::ENDDIR;
 
-    static const int REST = D3Q27System::DIR_000; /*f0 */
-    static const int E    = D3Q27System::DIR_P00;    /*f1 */
-    static const int W    = D3Q27System::DIR_M00;    /*f2 */
-    static const int N    = D3Q27System::DIR_0P0;    /*f3 */
-    static const int S    = D3Q27System::DIR_0M0;    /*f4 */
-    static const int T    = D3Q27System::DIR_00P;    /*f5 */
-    static const int B    = D3Q27System::DIR_00M;    /*f6 */
-    static const int NE   = D3Q27System::DIR_PP0;   /*f7 */
-    static const int SW   = D3Q27System::DIR_MM0;   /*f8 */
-    static const int SE   = D3Q27System::DIR_PM0;   /*f9 */
-    static const int NW   = D3Q27System::DIR_MP0;   /*f10*/
-    static const int TE   = D3Q27System::DIR_P0P;   /*f11*/
-    static const int BW   = D3Q27System::DIR_M0M;   /*f12*/
-    static const int BE   = D3Q27System::DIR_P0M;   /*f13*/
-    static const int TW   = D3Q27System::DIR_M0P;   /*f14*/
-    static const int TN   = D3Q27System::DIR_0PP;   /*f15*/
-    static const int BS   = D3Q27System::DIR_0MM;   /*f16*/
-    static const int BN   = D3Q27System::DIR_0PM;   /*f17*/
-    static const int TS   = D3Q27System::DIR_0MP;   /*f18*/
-    static const int TNE  = D3Q27System::DIR_PPP;
-    static const int TNW  = D3Q27System::DIR_MPP;
-    static const int TSE  = D3Q27System::DIR_PMP;
-    static const int TSW  = D3Q27System::DIR_MMP;
-    static const int BNE  = D3Q27System::DIR_PPM;
-    static const int BNW  = D3Q27System::DIR_MPM;
-    static const int BSE  = D3Q27System::DIR_PMM;
-    static const int BSW  = D3Q27System::DIR_MMM;
+    static const int REST = vf::lbm::dir::DIR_000; /*f0 */
+    static const int E    = vf::lbm::dir::DIR_P00;    /*f1 */
+    static const int W    = vf::lbm::dir::DIR_M00;    /*f2 */
+    static const int N    = vf::lbm::dir::DIR_0P0;    /*f3 */
+    static const int S    = vf::lbm::dir::DIR_0M0;    /*f4 */
+    static const int T    = vf::lbm::dir::DIR_00P;    /*f5 */
+    static const int B    = vf::lbm::dir::DIR_00M;    /*f6 */
+    static const int NE   = vf::lbm::dir::DIR_PP0;   /*f7 */
+    static const int SW   = vf::lbm::dir::DIR_MM0;   /*f8 */
+    static const int SE   = vf::lbm::dir::DIR_PM0;   /*f9 */
+    static const int NW   = vf::lbm::dir::DIR_MP0;   /*f10*/
+    static const int TE   = vf::lbm::dir::DIR_P0P;   /*f11*/
+    static const int BW   = vf::lbm::dir::DIR_M0M;   /*f12*/
+    static const int BE   = vf::lbm::dir::DIR_P0M;   /*f13*/
+    static const int TW   = vf::lbm::dir::DIR_M0P;   /*f14*/
+    static const int TN   = vf::lbm::dir::DIR_0PP;   /*f15*/
+    static const int BS   = vf::lbm::dir::DIR_0MM;   /*f16*/
+    static const int BN   = vf::lbm::dir::DIR_0PM;   /*f17*/
+    static const int TS   = vf::lbm::dir::DIR_0MP;   /*f18*/
+    static const int TNE  = vf::lbm::dir::DIR_PPP;
+    static const int TNW  = vf::lbm::dir::DIR_MPP;
+    static const int TSE  = vf::lbm::dir::DIR_PMP;
+    static const int TSW  = vf::lbm::dir::DIR_MMP;
+    static const int BNE  = vf::lbm::dir::DIR_PPM;
+    static const int BNW  = vf::lbm::dir::DIR_MPM;
+    static const int BSE  = vf::lbm::dir::DIR_PMM;
+    static const int BSW  = vf::lbm::dir::DIR_MMM;
 
-    static const int INV_E   = D3Q27System::DIR_M00;
-    static const int INV_W   = D3Q27System::DIR_P00;
-    static const int INV_N   = D3Q27System::DIR_0M0;
-    static const int INV_S   = D3Q27System::DIR_0P0;
-    static const int INV_T   = D3Q27System::DIR_00M;
-    static const int INV_B   = D3Q27System::DIR_00P;
-    static const int INV_NE  = D3Q27System::DIR_MM0;
-    static const int INV_SW  = D3Q27System::DIR_PP0;
-    static const int INV_SE  = D3Q27System::DIR_MP0;
-    static const int INV_NW  = D3Q27System::DIR_PM0;
-    static const int INV_TE  = D3Q27System::DIR_M0M;
-    static const int INV_BW  = D3Q27System::DIR_P0P;
-    static const int INV_BE  = D3Q27System::DIR_M0P;
-    static const int INV_TW  = D3Q27System::DIR_P0M;
-    static const int INV_TN  = D3Q27System::DIR_0MM;
-    static const int INV_BS  = D3Q27System::DIR_0PP;
-    static const int INV_BN  = D3Q27System::DIR_0MP;
-    static const int INV_TS  = D3Q27System::DIR_0PM;
-    static const int INV_TNE = D3Q27System::DIR_MMM;
-    static const int INV_TNW = D3Q27System::DIR_PMM;
-    static const int INV_TSE = D3Q27System::DIR_MPM;
-    static const int INV_TSW = D3Q27System::DIR_PPM;
-    static const int INV_BNE = D3Q27System::DIR_MMP;
-    static const int INV_BNW = D3Q27System::DIR_PMP;
-    static const int INV_BSE = D3Q27System::DIR_MPP;
-    static const int INV_BSW = D3Q27System::DIR_PPP;
+    static const int INV_E   = vf::lbm::dir::DIR_M00;
+    static const int INV_W   = vf::lbm::dir::DIR_P00;
+    static const int INV_N   = vf::lbm::dir::DIR_0M0;
+    static const int INV_S   = vf::lbm::dir::DIR_0P0;
+    static const int INV_T   = vf::lbm::dir::DIR_00M;
+    static const int INV_B   = vf::lbm::dir::DIR_00P;
+    static const int INV_NE  = vf::lbm::dir::DIR_MM0;
+    static const int INV_SW  = vf::lbm::dir::DIR_PP0;
+    static const int INV_SE  = vf::lbm::dir::DIR_MP0;
+    static const int INV_NW  = vf::lbm::dir::DIR_PM0;
+    static const int INV_TE  = vf::lbm::dir::DIR_M0M;
+    static const int INV_BW  = vf::lbm::dir::DIR_P0P;
+    static const int INV_BE  = vf::lbm::dir::DIR_M0P;
+    static const int INV_TW  = vf::lbm::dir::DIR_P0M;
+    static const int INV_TN  = vf::lbm::dir::DIR_0MM;
+    static const int INV_BS  = vf::lbm::dir::DIR_0PP;
+    static const int INV_BN  = vf::lbm::dir::DIR_0MP;
+    static const int INV_TS  = vf::lbm::dir::DIR_0PM;
+    static const int INV_TNE = vf::lbm::dir::DIR_MMM;
+    static const int INV_TNW = vf::lbm::dir::DIR_PMM;
+    static const int INV_TSE = vf::lbm::dir::DIR_MPM;
+    static const int INV_TSW = vf::lbm::dir::DIR_PPM;
+    static const int INV_BNE = vf::lbm::dir::DIR_MMP;
+    static const int INV_BNW = vf::lbm::dir::DIR_PMP;
+    static const int INV_BSE = vf::lbm::dir::DIR_MPP;
+    static const int INV_BSW = vf::lbm::dir::DIR_PPP;
 
     static const unsigned long int etZERO; // 1;/*f0 */
     static const unsigned long int etE;    //  2;    /*f1 */
diff --git a/src/cpu/VirtualFluidsCore/Data/VoidData3D.h b/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
index 12afae57d0f88d01963b4694aca881dd18691f87..25fe5dde2a5a874fdefe0eaf2502c86df29faa95 100644
--- a/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
@@ -8,7 +8,7 @@ class VoidData3D : public EsoTwist3D
 public:
     VoidData3D() = default;
     
-    VoidData3D(size_t nx1, size_t nx2, size_t nx3, LBMReal /*value*/)
+    VoidData3D(size_t nx1, size_t nx2, size_t nx3, real /*value*/)
     {
         this->NX1 = nx1;
         this->NX2 = nx2;
@@ -19,28 +19,28 @@ public:
     size_t getNX1() const override { return NX1; }
     size_t getNX2() const override { return NX2; }
     size_t getNX3() const override { return NX3; }
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override
     {
     }
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override {}
-    LBMReal getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override {}
+    real getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
     {
         return 0.0;
     }
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override
     {
     }
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override
     {
     }
-    LBMReal getDistributionForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
+    real getDistributionForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
     {
         return 0.0;
     }
diff --git a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
index 85a01cd1f85cf3efc556dd176cb748fc947e972b..92559f5f37d0b39a086b6bab0e63157b79c9f718 100644
--- a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
@@ -67,7 +67,7 @@ void BasicCalculator::calculate()
 
 #ifdef TIMING
         UbTimer timer;
-        double time[6];
+        real time[6];
 #endif
 
         for (calcStep = startTimeStep; calcStep <= numberOfTimeSteps; calcStep++) {
@@ -147,7 +147,7 @@ void BasicCalculator::calculate()
             if (additionalGhostLayerUpdateScheduler->isDue(calcStep)) {
                 exchangeBlockData(straightStartLevel, maxInitLevel);
             }
-            coProcess((double)(calcStep));
+            coProcess((real)(calcStep));
             // now ghost nodes have actual values
         }
         UBLOG(logDEBUG1, "OMPCalculator::calculate() - stoped");
diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
index c6a75187047f1b507389642baaa8471abecd1683..a6b7127bc4ecd2049790cb2bef98c1d506f7f513 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
@@ -257,6 +257,8 @@ int Block3D::getNumberOfRemoteConnectors()
 //////////////////////////////////////////////////////////////////////////
 int Block3D::getNumberOfLocalConnectorsForSurfaces()
 {
+    using namespace vf::lbm::dir;
+
     int count = 0;
 
     if (connectors.size() < 6)
@@ -264,7 +266,7 @@ int Block3D::getNumberOfLocalConnectorsForSurfaces()
 
     for (SPtr<Block3DConnector> c : connectors) {
         if (c) {
-            if (c->getSendDir() >= D3Q27System::DIR_P00 && c->getSendDir() <= D3Q27System ::DIR_00M && c->isLocalConnector())
+            if (c->getSendDir() >= (int)DIR_P00 && c->getSendDir() <= (int)DIR_00M && c->isLocalConnector())
                 count++;
         }
     }
@@ -274,11 +276,13 @@ int Block3D::getNumberOfLocalConnectorsForSurfaces()
 //////////////////////////////////////////////////////////////////////////
 int Block3D::getNumberOfRemoteConnectorsForSurfaces()
 {
+    using namespace vf::lbm::dir;
+
     int count = 0;
 
     for (SPtr<Block3DConnector> c : connectors) {
         if (c) {
-            if (c->getSendDir() >= D3Q27System::DIR_P00 && c->getSendDir() <= D3Q27System ::DIR_00M && c->isRemoteConnector())
+            if (c->getSendDir() >= (int)DIR_P00 && c->getSendDir() <= (int)DIR_00M && c->isRemoteConnector())
                 count++;
         }
     }
@@ -317,10 +321,10 @@ void Block3D::deleteInterpolationFlag()
     interpolationFlagCF = 0;
 }
 //////////////////////////////////////////////////////////////////////////
-double Block3D::getWorkLoad()
+real Block3D::getWorkLoad()
 {
-    double l = kernel->getCalculationTime();
-    l *= static_cast<double>(1 << level);
+    real l = kernel->getCalculationTime();
+    l *= static_cast<real>(1 << level);
     return l;
 }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.h b/src/cpu/VirtualFluidsCore/Grid/Block3D.h
index b2279b069e6ee322023d30419f8eed5c587f95e8..686d8df0f9a4dda1b8d744b11b18a7bddf7e3f77 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Block3D.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.h
@@ -38,6 +38,7 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "lbm/constants/D3Q27.h"
 
 class Block3DConnector;
 class LBMKernel;
@@ -134,7 +135,7 @@ public:
     bool hasInterpolationFlagFC(int dir);
     bool hasInterpolationFlagFC();
 
-    double getWorkLoad();
+    real getWorkLoad();
 
     std::string toString();
 
diff --git a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
index fbeb2de979bb31dfb87441b5cfcfdf3393f0043c..85235867a3352c8277e6e279b7dbb6058d03020a 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
@@ -77,7 +77,7 @@ Calculator::~Calculator() = default;
 //////////////////////////////////////////////////////////////////////////
 void Calculator::addCoProcessor(SPtr<CoProcessor> coProcessor) { coProcessors.push_back(coProcessor); }
 //////////////////////////////////////////////////////////////////////////
-void Calculator::coProcess(double step)
+void Calculator::coProcess(real step)
 {
     for (SPtr<CoProcessor> cp : coProcessors) {
         cp->process(step);
diff --git a/src/cpu/VirtualFluidsCore/Grid/Calculator.h b/src/cpu/VirtualFluidsCore/Grid/Calculator.h
index 377e6eee7d765ba6888b2aec6045cb14eac484dc..759541dd5c1a85cac727c9a714407d1fa10a1cb7 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Calculator.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Calculator.h
@@ -36,6 +36,7 @@
 
 #include <PointerDefinitions.h>
 #include <vector>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 class UbScheduler;
@@ -53,7 +54,7 @@ public:
     virtual ~Calculator();
     //! control of coProcessors
     void addCoProcessor(SPtr<CoProcessor> coProcessor);
-    void coProcess(double step);
+    void coProcess(real step);
 
     virtual void calculate() = 0;
 
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
index 8c9ccab62fa18f96abeca389ab437e62cdc80d7b..a214b4bd0137b2bf319925b519f1dcb77fabded4 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
@@ -45,6 +45,7 @@
 #include "D3Q27System.h"
 #include <Block3D.h>
 #include <Communicator.h>
+#include "UbMath.h"
 
 using namespace std;
 
@@ -66,8 +67,8 @@ Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int bl
     levelSet.resize(D3Q27System::MAXLEVEL + 1);
     bundle = comm->getBundleID();
     rank  = comm->getProcessID();
-    trafo = std::make_shared<CoordinateTransformation3D>(0.0, 0.0, 0.0, (double)blockNx1, (double)blockNx2,
-                                                         (double)blockNx3);
+    trafo = std::make_shared<CoordinateTransformation3D>(0.0, 0.0, 0.0, (real)blockNx1, (real)blockNx2,
+                                                         (real)blockNx3);
     UbTupleInt3 minInd(0, 0, 0);
     UbTupleInt3 maxInd(gridNx1, gridNx2, gridNx3);
     this->fillExtentWithBlocks(minInd, maxInd);
@@ -75,7 +76,7 @@ Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int bl
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::addInteractor(SPtr<Interactor3D> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::addAndInitInteractor(SPtr<Interactor3D> interactor, double timestep)
+void Grid3D::addAndInitInteractor(SPtr<Interactor3D> interactor, real timestep)
 {
     interactors.push_back(interactor);
     interactor->initInteractor(timestep);
@@ -432,7 +433,7 @@ void Grid3D::setPeriodicX2(bool value) { this->periodicX2 = value; }
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::setPeriodicX3(bool value) { this->periodicX3 = value; }
 //////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord) const
+UbTupleInt3 Grid3D::getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord) const
 {
     if (!trafo) {
         return makeUbTuple((int)blockX1Coord, (int)blockX2Coord, (int)blockX3Coord);
@@ -443,14 +444,14 @@ UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, do
                        (int)trafo->transformForwardToX3Coordinate(blockX1Coord, blockX2Coord, blockX3Coord));
 }
 //////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord, int level) const
+UbTupleInt3 Grid3D::getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord, int level) const
 {
     if (!trafo) {
         return makeUbTuple((int)blockX1Coord, (int)blockX2Coord, (int)blockX3Coord);
     }
 
-    double dx = getDeltaX(level);
-    double blockLentghX1, blockLentghX2, blockLentghX3;
+    real dx = getDeltaX(level);
+    real blockLentghX1, blockLentghX2, blockLentghX3;
     blockLentghX1      = blockNx1 * dx;
     blockLentghX2      = blockNx2 * dx;
     blockLentghX3      = blockNx3 * dx;
@@ -471,10 +472,10 @@ UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, do
 UbTupleDouble3 Grid3D::getBlockLengths(const SPtr<Block3D> block) const
 {
     int level    = block->getLevel();
-    double delta = 1.0 / (double)(1 << level);
+    real delta = 1.0 / (real)(1 << level);
 
     if (!trafo)
-        makeUbTuple<double, double, double>(delta, delta, delta);
+        makeUbTuple<real, real, real>(delta, delta, delta);
 
     return makeUbTuple(trafo->getX1CoordinateScaling() * delta, trafo->getX2CoordinateScaling() * delta,
                        trafo->getX3CoordinateScaling() * delta);
@@ -486,21 +487,21 @@ void Grid3D::setCoordinateTransformator(SPtr<CoordinateTransformation3D> trafo)
 //////////////////////////////////////////////////////////////////////////
 const SPtr<CoordinateTransformation3D> Grid3D::getCoordinateTransformator() const { return this->trafo; }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setDeltaX(double dx) { this->orgDeltaX = dx; }
+void Grid3D::setDeltaX(real dx) { this->orgDeltaX = dx; }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setDeltaX(double worldUnit, double gridUnit) { this->orgDeltaX = worldUnit / gridUnit; }
+void Grid3D::setDeltaX(real worldUnit, real gridUnit) { this->orgDeltaX = worldUnit / gridUnit; }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getDeltaX(int level) const
+real Grid3D::getDeltaX(int level) const
 {
-    double delta = this->orgDeltaX / (double)(1 << level);
+    real delta = this->orgDeltaX / (real)(1 << level);
     return delta;
 }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getDeltaX(SPtr<Block3D> block) const { return getDeltaX(block->getLevel()); }
+real Grid3D::getDeltaX(SPtr<Block3D> block) const { return getDeltaX(block->getLevel()); }
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 Grid3D::getNodeOffset(SPtr<Block3D> block) const
 {
-    double delta = this->getDeltaX(block);
+    real delta = this->getDeltaX(block);
     return makeUbTuple(offset * delta, offset * delta, offset * delta);
 }
 ////////////////////////////////////////////////////////////////////////////
@@ -508,26 +509,26 @@ Vector3D Grid3D::getNodeCoordinates(SPtr<Block3D> block, int ix1, int ix2, int i
 {
     UbTupleDouble3 org        = this->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = this->getNodeOffset(block);
-    double deltaX             = getDeltaX(block);
+    real deltaX             = getDeltaX(block);
 
-    double x1 = val<1>(org) - val<1>(nodeOffset) + (double)ix1 * deltaX;
-    double x2 = val<2>(org) - val<2>(nodeOffset) + (double)ix2 * deltaX;
-    double x3 = val<3>(org) - val<3>(nodeOffset) + (double)ix3 * deltaX;
+    real x1 = val<1>(org) - val<1>(nodeOffset) + (real)ix1 * deltaX;
+    real x2 = val<2>(org) - val<2>(nodeOffset) + (real)ix2 * deltaX;
+    real x3 = val<3>(org) - val<3>(nodeOffset) + (real)ix3 * deltaX;
 
     return Vector3D(x1, x2, x3);
 }
 ////////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getNodeIndexes(SPtr<Block3D> block, double nodeX1Coord, double nodeX2Coord,
-                                   double nodeX3Coord) const
+UbTupleInt3 Grid3D::getNodeIndexes(SPtr<Block3D> block, real nodeX1Coord, real nodeX2Coord,
+                                   real nodeX3Coord) const
 {
     UbTupleDouble3 org        = this->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = this->getNodeOffset(block);
-    double deltaX             = getDeltaX(block);
+    real deltaX             = getDeltaX(block);
 
     int ix1, ix2, ix3;
-    double ixx1 = (abs(nodeX1Coord - val<1>(org) + val<1>(nodeOffset)) / deltaX);
-    double ixx2 = (abs(nodeX2Coord - val<2>(org) + val<2>(nodeOffset)) / deltaX);
-    double ixx3 = (abs(nodeX3Coord - val<3>(org) + val<3>(nodeOffset)) / deltaX);
+    real ixx1 = (abs(nodeX1Coord - val<1>(org) + val<1>(nodeOffset)) / deltaX);
+    real ixx2 = (abs(nodeX2Coord - val<2>(org) + val<2>(nodeOffset)) / deltaX);
+    real ixx3 = (abs(nodeX3Coord - val<3>(org) + val<3>(nodeOffset)) / deltaX);
     if (ixx1 - (int)ixx1 > .9999999999)
         ix1 = (int)ixx1 + 1;
     else
@@ -560,10 +561,10 @@ UbTupleDouble3 Grid3D::getBlockWorldCoordinates(SPtr<Block3D> block) const
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 Grid3D::getBlockWorldCoordinates(int blockX1Index, int blockX2Index, int blockX3Index, int level) const
 {
-    double c1oShiftedLevel = 1.0 / (double)(1 << level);
-    double x1              = (double)blockX1Index * c1oShiftedLevel;
-    double x2              = (double)blockX2Index * c1oShiftedLevel;
-    double x3              = (double)blockX3Index * c1oShiftedLevel;
+    real c1oShiftedLevel = 1.0 / (real)(1 << level);
+    real x1              = (real)blockX1Index * c1oShiftedLevel;
+    real x2              = (real)blockX2Index * c1oShiftedLevel;
+    real x3              = (real)blockX3Index * c1oShiftedLevel;
 
     if (!trafo)
         return { x1, x2, x3 };
@@ -631,8 +632,9 @@ SPtr<Block3D> Grid3D::getNeighborBlock(int dir, SPtr<Block3D> block) const
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::getAllNeighbors(int ix1, int ix2, int ix3, int level, int levelDepth, std::vector<SPtr<Block3D>> &blocks)
 {
-    for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++)
-    {
+   // for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++)FSTARTDIR
+   for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++)
+   {
         this->getNeighborBlocksForDirection(dir, ix1, ix2, ix3, level, levelDepth, blocks);
     }
 }
@@ -1098,83 +1100,85 @@ void Grid3D::getNeighborsBottomSouthWest(int ix1, int ix2, int ix3, int level, i
 void Grid3D::getNeighborBlocksForDirection(int dir, int ix1, int ix2, int ix3, int level, int levelDepth,
                                            std::vector<SPtr<Block3D>> &blocks)
 {
+    using namespace vf::lbm::dir;
+
     switch (dir) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             this->getNeighborsEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             this->getNeighborsWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             this->getNeighborsNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             this->getNeighborsSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             this->getNeighborsTop(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             this->getNeighborsBottom(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             this->getNeighborsNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             this->getNeighborsSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             this->getNeighborsSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             this->getNeighborsNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             this->getNeighborsTopEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             this->getNeighborsBottomWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             this->getNeighborsBottomEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             this->getNeighborsTopWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             this->getNeighborsTopNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             this->getNeighborsBottomSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             this->getNeighborsBottomNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             this->getNeighborsTopSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             this->getNeighborsTopNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             this->getNeighborsTopNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             this->getNeighborsTopSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             this->getNeighborsTopSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             this->getNeighborsBottomNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             this->getNeighborsBottomNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             this->getNeighborsBottomSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             this->getNeighborsBottomSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
         default:
@@ -1261,86 +1265,88 @@ void Grid3D::getSubBlocksZero(int ix1, int ix2, int ix3, int level, vector<SPtr<
 void Grid3D::getNeighborBlocksForDirectionWithREST(int dir, int ix1, int ix2, int ix3, int level, int levelDepth,
                                                       std::vector<SPtr<Block3D>> &blocks)
 {
+    using namespace vf::lbm::dir;
+
     switch (dir) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             this->getNeighborsEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             this->getNeighborsWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             this->getNeighborsNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             this->getNeighborsSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             this->getNeighborsTop(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             this->getNeighborsBottom(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             this->getNeighborsNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             this->getNeighborsSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             this->getNeighborsSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             this->getNeighborsNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             this->getNeighborsTopEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             this->getNeighborsBottomWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             this->getNeighborsBottomEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             this->getNeighborsTopWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             this->getNeighborsTopNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             this->getNeighborsBottomSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             this->getNeighborsBottomNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             this->getNeighborsTopSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             this->getNeighborsTopNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             this->getNeighborsTopNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             this->getNeighborsTopSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             this->getNeighborsTopSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             this->getNeighborsBottomNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             this->getNeighborsBottomNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             this->getNeighborsBottomSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             this->getNeighborsBottomSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             this->getNeighborsZero(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
         default:
@@ -2025,7 +2031,7 @@ int Grid3D::getNumberOfBlocks()
 //////////////////////////////////////////////////////////////////////////
 int Grid3D::getNumberOfBlocks(int level) { return (int)levelSet[level].size(); }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+void Grid3D::getBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                                std::vector<SPtr<Block3D>> &blocks)
 {
     int coarsestLevel = this->getCoarsestInitializedLevel();
@@ -2035,9 +2041,9 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden,
     // da beim Transformieren der "groessere" Index rauskommt
@@ -2062,9 +2068,9 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     std::set<SPtr<Block3D>> blockset;
     for (int level = coarsestLevel; level <= finestLevel; level++) {
         // damit bei negativen werten auch der "kleinere" genommen wird -> floor!
-        int minx1 = (int)std::floor((double)iMinX1 / (1 << (finestLevel - level)));
-        int minx2 = (int)std::floor((double)iMinX2 / (1 << (finestLevel - level)));
-        int minx3 = (int)std::floor((double)iMinX3 / (1 << (finestLevel - level)));
+        int minx1 = (int)std::floor((real)iMinX1 / (1 << (finestLevel - level)));
+        int minx2 = (int)std::floor((real)iMinX2 / (1 << (finestLevel - level)));
+        int minx3 = (int)std::floor((real)iMinX3 / (1 << (finestLevel - level)));
 
         int maxx1 = iMaxX1 / (1 << (finestLevel - level));
         int maxx2 = iMaxX2 / (1 << (finestLevel - level));
@@ -2084,16 +2090,16 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getBlocksByCuboid(int level, double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                               double maxX3, std::vector<SPtr<Block3D>> &blocks)
+void Grid3D::getBlocksByCuboid(int level, real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                               real maxX3, std::vector<SPtr<Block3D>> &blocks)
 {
     //////////////////////////////////////////////////////////////////////////
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << level);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << level);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << level);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden:
     int iMinX1 = (int)dMinX1;
@@ -2128,7 +2134,7 @@ void Grid3D::getBlocksByCuboid(int level, double minX1, double minX2, double min
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+void Grid3D::getAllBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                                   std::vector<SPtr<Block3D>> &blocks)
 {
     int coarsestLevel = this->getCoarsestInitializedLevel();
@@ -2138,9 +2144,9 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden,
     // da beim Transformieren der "groessere" Index rauskommt
@@ -2165,9 +2171,9 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     std::set<SPtr<Block3D>> blockset;
     for (int level = coarsestLevel; level <= finestLevel; level++) {
         // damit bei negativen werten auch der "kleinere" genommen wird -> floor!
-        int minx1 = (int)std::floor((double)iMinX1 / (1 << (finestLevel - level)));
-        int minx2 = (int)std::floor((double)iMinX2 / (1 << (finestLevel - level)));
-        int minx3 = (int)std::floor((double)iMinX3 / (1 << (finestLevel - level)));
+        int minx1 = (int)std::floor((real)iMinX1 / (1 << (finestLevel - level)));
+        int minx2 = (int)std::floor((real)iMinX2 / (1 << (finestLevel - level)));
+        int minx3 = (int)std::floor((real)iMinX3 / (1 << (finestLevel - level)));
 
         int maxx1 = iMaxX1 / (1 << (finestLevel - level));
         int maxx2 = iMaxX2 / (1 << (finestLevel - level));
@@ -2187,25 +2193,25 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::calcStartCoordinatesAndDelta(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3,
-                                          double &deltaX)
+void Grid3D::calcStartCoordinatesAndDelta(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3,
+                                          real &deltaX)
 {
     int blocklevel = block->getLevel();
     worldX1        = block->getX1() / (float)(1 << blocklevel);
     worldX2        = block->getX2() / (float)(1 << blocklevel);
     worldX3        = block->getX3() / (float)(1 << blocklevel);
-    deltaX         = (double)1.0 / (double)(this->blockNx1 * (double)(1 << blocklevel));
+    deltaX         = (real)1.0 / (real)(this->blockNx1 * (real)(1 << blocklevel));
 
     if (this->trafo) {
-        double x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
+        real x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
         worldX1 = this->trafo->transformBackwardToX1Coordinate(x1tmp, x2tmp, x3tmp);
         worldX2 = this->trafo->transformBackwardToX2Coordinate(x1tmp, x2tmp, x3tmp);
         worldX3 = this->trafo->transformBackwardToX3Coordinate(x1tmp, x2tmp, x3tmp);
-        deltaX  = this->trafo->getX1CoordinateScaling() / (double)(this->blockNx1 * (double)(1 << blocklevel));
+        deltaX  = this->trafo->getX1CoordinateScaling() / (real)(this->blockNx1 * (real)(1 << blocklevel));
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3)
+void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3)
 {
     int blocklevel = block->getLevel();
     worldX1        = block->getX1() / (float)(1 << blocklevel);
@@ -2213,7 +2219,7 @@ void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &wor
     worldX3        = block->getX3() / (float)(1 << blocklevel);
 
     if (this->trafo) {
-        double x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
+        real x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
         worldX1 = this->trafo->transformBackwardToX1Coordinate(x1tmp, x2tmp, x3tmp);
         worldX2 = this->trafo->transformBackwardToX2Coordinate(x1tmp, x2tmp, x3tmp);
         worldX3 = this->trafo->transformBackwardToX3Coordinate(x1tmp, x2tmp, x3tmp);
@@ -2227,12 +2233,12 @@ int Grid3D::getGhostLayerWidth() const
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::setGhostLayerWidth(int ghostLayerWidth)
 {
-    this->offset = static_cast<double>(ghostLayerWidth) - 0.5;
+    this->offset = static_cast<real>(ghostLayerWidth) - 0.5;
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setTimeStep(double step) { timeStep = step; }
+void Grid3D::setTimeStep(real step) { timeStep = step; }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getTimeStep() const { return timeStep; }
+real Grid3D::getTimeStep() const { return timeStep; }
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::fillExtentWithBlocks(UbTupleInt3 minInd, UbTupleInt3 maxInd)
 {
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
index fabaaa655e2b63201256802473ec037279f0ea3b..41a99d6cc7be5177cc0f3ff2e89591b28317fede 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
@@ -41,6 +41,7 @@
 #include <basics/utilities/UbKeys.h>
 #include <basics/utilities/UbTuple.h>
 #include <basics/utilities/Vector3D.h>
+#include "lbm/constants/D3Q27.h"
 
 class CoordinateTransformation3D;
 
@@ -77,11 +78,11 @@ public:
     void replaceBlock(SPtr<Block3D> block);
     SPtr<Block3D> getBlock(int ix1, int ix2, int ix3, int level) const;
     SPtr<Block3D> getBlock(int id) const;
-    void getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+    void getBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                            std::vector<SPtr<Block3D>> &blocks);
-    void getBlocksByCuboid(int level, double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                           double maxX3, std::vector<SPtr<Block3D>> &blocks);
-    void getAllBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+    void getBlocksByCuboid(int level, real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                           real maxX3, std::vector<SPtr<Block3D>> &blocks);
+    void getAllBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                               std::vector<SPtr<Block3D>> &blocks);
     //! get blocks for level
     void getBlocks(int level, std::vector<SPtr<Block3D>> &blockVector);
@@ -166,7 +167,7 @@ public:
     //////////////////////////////////////////////////////////////////////////
     // interactors control
     void addInteractor(SPtr<Interactor3D> interactor);
-    void addAndInitInteractor(SPtr<Interactor3D> interactor, double timestep = 0);
+    void addAndInitInteractor(SPtr<Interactor3D> interactor, real timestep = 0);
     Interactor3DSet getInteractors();
     //////////////////////////////////////////////////////////////////////////
     // visitors
@@ -189,19 +190,19 @@ public:
     void setPeriodicX3(bool value);
     //////////////////////////////////////////////////////////////////////////
     // Topology
-    UbTupleInt3 getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord) const;
-    UbTupleInt3 getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord, int level) const;
+    UbTupleInt3 getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord) const;
+    UbTupleInt3 getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord, int level) const;
     UbTupleDouble3 getBlockLengths(SPtr<Block3D> block) const;
     UbTupleDouble6 getBlockOversize() const;
     void setCoordinateTransformator(SPtr<CoordinateTransformation3D> trafo);
     const SPtr<CoordinateTransformation3D> getCoordinateTransformator() const;
-    void setDeltaX(double dx);
-    void setDeltaX(double worldUnit, double gridUnit);
-    double getDeltaX(int level) const;
-    double getDeltaX(SPtr<Block3D> block) const;
+    void setDeltaX(real dx);
+    void setDeltaX(real worldUnit, real gridUnit);
+    real getDeltaX(int level) const;
+    real getDeltaX(SPtr<Block3D> block) const;
     UbTupleDouble3 getNodeOffset(SPtr<Block3D> block) const;
     Vector3D getNodeCoordinates(SPtr<Block3D> block, int ix1, int ix2, int ix3) const;
-    UbTupleInt3 getNodeIndexes(SPtr<Block3D> block, double nodeX1Coord, double nodeX2Coord, double nodeX3Coord) const;
+    UbTupleInt3 getNodeIndexes(SPtr<Block3D> block, real nodeX1Coord, real nodeX2Coord, real nodeX3Coord) const;
     void setBlockNX(int nx1, int nx2, int nx3);
     UbTupleInt3 getBlockNX() const;
     UbTupleDouble3 getBlockWorldCoordinates(SPtr<Block3D> block) const;
@@ -212,16 +213,16 @@ public:
     int getNX1() const;
     int getNX2() const;
     int getNX3() const;
-    void calcStartCoordinatesAndDelta(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3, double &deltaX);
-    void calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3);
+    void calcStartCoordinatesAndDelta(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3, real &deltaX);
+    void calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3);
     int getGhostLayerWidth() const;
     void setGhostLayerWidth(int ghostLayerWidth);
     //////////////////////////////////////////////////////////////////////////
     // LBM
     // double getDeltaT(SPtr<Block3D>) const;
     //////////////////////////////////////////////////////////////////////////
-    void setTimeStep(double step);
-    double getTimeStep() const;
+    void setTimeStep(real step);
+    real getTimeStep() const;
 
 protected:
     void checkLevel(int level);
@@ -309,11 +310,11 @@ private:
     int nx3{ 0 };
 
     SPtr<CoordinateTransformation3D> trafo;
-    double orgDeltaX{ 1.0 };
+    real orgDeltaX{ 1.0 };
 
-    double timeStep{ 0.0 };
+    real timeStep{ 0.0 };
 
-    double offset{ 0.5 };
+    real offset{ 0.5 };
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
index a834466f85b85890bf7c1a5a264807e6a0b13ee1..06d0daffd5814f7956125c58c40e7122bf4878f0 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
@@ -87,117 +87,119 @@ D3Q27Interactor::~D3Q27Interactor() = default;
 //////////////////////////////////////////////////////////////////////////
 void D3Q27Interactor::initRayVectors()
 {
+    using namespace vf::lbm::dir;
+
     int fdir;
-    double c1oS2 = UbMath::one_over_sqrt2;
-    double c1oS3 = UbMath::one_over_sqrt3;
-    fdir         = D3Q27System::DIR_P00;
+    real c1oS2 = vf::lbm::constant::one_over_sqrt2;
+    real c1oS3 = vf::lbm::constant::one_over_sqrt3;
+    fdir         = DIR_P00;
     rayX1[fdir]  = 1.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_M00;
+    fdir         = DIR_M00;
     rayX1[fdir]  = -1.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_0P0;
+    fdir         = DIR_0P0;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 1.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_0M0;
+    fdir         = DIR_0M0;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -1.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_00P;
+    fdir         = DIR_00P;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 1.0;
-    fdir         = D3Q27System::DIR_00M;
+    fdir         = DIR_00M;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -1.0;
-    fdir         = D3Q27System::DIR_PP0;
+    fdir         = DIR_PP0;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_MM0;
+    fdir         = DIR_MM0;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_PM0;
+    fdir         = DIR_PM0;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_MP0;
+    fdir         = DIR_MP0;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_P0P;
+    fdir         = DIR_P0P;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_M0M;
+    fdir         = DIR_M0M;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_P0M;
+    fdir         = DIR_P0M;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_M0P;
+    fdir         = DIR_M0P;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_0PP;
+    fdir         = DIR_0PP;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_0MM;
+    fdir         = DIR_0MM;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_0PM;
+    fdir         = DIR_0PM;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_0MP;
+    fdir         = DIR_0MP;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = c1oS2;
 
-    fdir        = D3Q27System::DIR_MPP;
+    fdir        = DIR_MPP;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_PPP;
+    fdir        = DIR_PPP;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_MMP;
+    fdir        = DIR_MMP;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_PMP;
+    fdir        = DIR_PMP;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_MPM;
+    fdir        = DIR_MPM;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_PPM;
+    fdir        = DIR_PPM;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_MMM;
+    fdir        = DIR_MMM;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_PMM;
+    fdir        = DIR_PMM;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = -c1oS3;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27Interactor::initInteractor(const double &timeStep)
+void D3Q27Interactor::initInteractor(const real &timeStep)
 {
     UBLOG(logDEBUG5, "D3Q27Interactor::initInteractor - "
                          << " for timestep = " << timeStep);
@@ -222,7 +224,7 @@ void D3Q27Interactor::initInteractor(const double &timeStep)
     updateBlocks();
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27Interactor::updateInteractor(const double &timestep)
+void D3Q27Interactor::updateInteractor(const real &timestep)
 {
     UBLOG(logDEBUG5, "D3Q27Interactor::updateInteractor - for timestep = " << timestep);
 
@@ -262,9 +264,9 @@ void D3Q27Interactor::updateInteractor(const double &timestep)
             int x2          = (*setPos)[1];
             int x3          = (*setPos)[2];
             Vector3D coords = grid.lock()->getNodeCoordinates(block, x1, x2, x3);
-            double worldX1  = coords[0];
-            double worldX2  = coords[1];
-            double worldX3  = coords[2];
+            real worldX1  = coords[0];
+            real worldX2  = coords[1];
+            real worldX3  = coords[2];
 
             SPtr<BoundaryConditions> bc = bcArray->getBC(x1, x2, x3);
             if (bc) // may be that the BC has been deleted by the solid setting of another interactor
@@ -282,6 +284,8 @@ void D3Q27Interactor::updateInteractor(const double &timestep)
 // extendedBoundingGeoOfGeoObject MUST already have been magnified by delta_x_level in each direction for SOLID
 bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!block)
         return false;
 
@@ -293,7 +297,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     solidNodeIndicesMap[block]              = set<UbTupleInt3>();
     set<UbTupleInt3> &solidNodeIndices      = solidNodeIndicesMap[block];
 
-    double timestep    = 0;
+    real timestep    = 0;
     bool oneEntryGotBC = false;
     bool gotQs         = false;
     SPtr<BoundaryConditions> bc;
@@ -301,7 +305,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel = block->getKernel();
     SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
 
-    double internX1, internX2, internX3;
+    real internX1, internX2, internX3;
 
     int startIX1 = 0;
     int startIX2 = 0;
@@ -310,7 +314,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     int stopIX2  = (int)bcArray->getNX2();
     int stopIX3  = (int)bcArray->getNX3();
 
-    double dx = grid.lock()->getDeltaX(block);
+    real dx = grid.lock()->getDeltaX(block);
 
     // other boundingRect than in init, because here the boundrect has to be increased by one dx
     GbCuboid3D extendedBoundingGeoOfGeoObject(
@@ -318,7 +322,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
         geoObject3D->getX3Minimum() - 1.02 * dx, geoObject3D->getX1Maximum() + 1.02 * dx,
         geoObject3D->getX2Maximum() + 1.02 * dx, geoObject3D->getX3Maximum() + 1.02 * dx);
 
-    double deltaX1 = dx, deltaX2 = dx, deltaX3 = dx;
+    real deltaX1 = dx, deltaX2 = dx, deltaX3 = dx;
 
     if (geoObject3D->hasRaytracing() || (this->isInverseSolid() && geoObject3D->raytracingSupportsPointsInside())) {
         // if deltaX1==deltaX2==deltaX3 (must for LB!!)
@@ -326,20 +330,20 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
             throw UbException(
                 UB_EXARGS, "fuer den bei LB nicht vorkommenden Fall deltaX1!=deltaX2!=deltaX3  nicht implementiert ");
 
-        vector<double> distNeigh(D3Q27System::FENDDIR + 1, UbMath::sqrt2 * deltaX1);
-        distNeigh[D3Q27System::DIR_P00] = distNeigh[D3Q27System::DIR_M00] = distNeigh[D3Q27System::DIR_0P0] = deltaX1;
-        distNeigh[D3Q27System::DIR_0M0] = distNeigh[D3Q27System::DIR_00P] = distNeigh[D3Q27System::DIR_00M] = deltaX1;
-        distNeigh[D3Q27System::DIR_PP0] = distNeigh[D3Q27System::DIR_MP0] = distNeigh[D3Q27System::DIR_MM0] =
-            distNeigh[D3Q27System::DIR_PM0]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_P0P] = distNeigh[D3Q27System::DIR_0PP] = distNeigh[D3Q27System::DIR_M0P] =
-            distNeigh[D3Q27System::DIR_0MP]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_P0M] = distNeigh[D3Q27System::DIR_0PM] = distNeigh[D3Q27System::DIR_M0M] =
-            distNeigh[D3Q27System::DIR_0MM]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_PPP] = distNeigh[D3Q27System::DIR_MPP] = distNeigh[D3Q27System::DIR_PMP] =
-            distNeigh[D3Q27System::DIR_MMP]                           = UbMath::sqrt3 * deltaX1;
-        distNeigh[D3Q27System::DIR_PPM] = distNeigh[D3Q27System::DIR_MPM] = distNeigh[D3Q27System::DIR_PMM] =
-            distNeigh[D3Q27System::DIR_MMM]                           = UbMath::sqrt3 * deltaX1;
-        double q;
+        vector<real> distNeigh(D3Q27System::FENDDIR + 1, vf::lbm::constant::sqrt2 * deltaX1);
+        distNeigh[DIR_P00] = distNeigh[DIR_M00] = distNeigh[DIR_0P0] = deltaX1;
+        distNeigh[DIR_0M0] = distNeigh[DIR_00P] = distNeigh[DIR_00M] = deltaX1;
+        distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] =
+            distNeigh[DIR_PM0]             = vf::lbm::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] =
+            distNeigh[DIR_0MP]             = vf::lbm::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] =
+            distNeigh[DIR_0MM]             = vf::lbm::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] =
+            distNeigh[DIR_MMP]              = vf::lbm::constant::sqrt3 * deltaX1;
+        distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] =
+            distNeigh[DIR_MMM]              = vf::lbm::constant::sqrt3 * deltaX1;
+        real q;
         bool pointOnBoundary = false;
 
         //#ifdef _OPENMP
@@ -487,19 +491,19 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 
                         GbPoint3D pointA(internX1, internX2, internX3);
                         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
-                            double x1B = internX1 + D3Q27System::DX1[fdir] * deltaX1;
-                            double x2B = internX2 + D3Q27System::DX2[fdir] * deltaX2;
-                            double x3B = internX3 + D3Q27System::DX3[fdir] * deltaX3;
+                            real x1B = internX1 + D3Q27System::DX1[fdir] * deltaX1;
+                            real x2B = internX2 + D3Q27System::DX2[fdir] * deltaX2;
+                            real x3B = internX3 + D3Q27System::DX3[fdir] * deltaX3;
 
                             GbPoint3D pointB(x1B, x2B, x3B);
                             GbLine3D *clippedLine = this->geoObject3D->createClippedLine3D(pointA, pointB);
 
                             if (clippedLine) {
-                                double q = 0.0;
+                                real q = 0.0;
                                 if (!this->isInverseSolid()) // A is outside
                                 {
-                                    double distanceAB = pointA.getDistance(&pointB); // pointA to B
-                                    double distanceAP = UbMath::min(pointA.getDistance(clippedLine->getPoint1()),
+                                    real distanceAB = pointA.getDistance(&pointB); // pointA to B
+                                    real distanceAP = UbMath::min(pointA.getDistance(clippedLine->getPoint1()),
                                                                     pointA.getDistance(clippedLine->getPoint2()));
                                     q                 = distanceAP / distanceAB;
                                 } else {
@@ -507,8 +511,8 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
                                     if (!clippedLine->getPoint1()->equals(&pointB) &&
                                         !clippedLine->getPoint2()->equals(&pointB)) {
                                         // A is inside, a clipped line must not contain B
-                                        double distanceAB = pointA.getDistance(&pointB); // pointA to B
-                                        double distanceAP = clippedLine->getLength();
+                                        real distanceAB = pointA.getDistance(&pointB); // pointA to B
+                                        real distanceAP = clippedLine->getLength();
                                         q                 = distanceAP / distanceAB;
                                     } else if (this->geoObject3D->isPointInGbObject3D(
                                                    pointB.getX1Coordinate(), pointB.getX2Coordinate(),
@@ -569,11 +573,13 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt2> &lines)
 {
+    using namespace vf::lbm::dir;
+
     for (SPtr<Block3D> block : bcBlocks) {
         if (!block)
             continue;
 
-        double dx               = grid.lock()->getDeltaX(block);
+        real dx               = grid.lock()->getDeltaX(block);
         UbTupleDouble3 orgDelta = grid.lock()->getNodeOffset(block);
 
         SPtr<ILBMKernel> kernel = block->getKernel();
@@ -603,142 +609,142 @@ void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vecto
                     continue;
                 SPtr<BoundaryConditions> bc = bcArray->getBC(ix1, ix2, ix3);
 
-                double x1a = val<1>(blockOrg) - val<1>(orgDelta) + ix1 * dx;
-                double x2a = val<2>(blockOrg) - val<2>(orgDelta) + ix2 * dx;
-                double x3a = val<3>(blockOrg) - val<3>(orgDelta) + ix3 * dx;
+                real x1a = val<1>(blockOrg) - val<1>(orgDelta) + ix1 * dx;
+                real x2a = val<2>(blockOrg) - val<2>(orgDelta) + ix2 * dx;
+                real x3a = val<3>(blockOrg) - val<3>(orgDelta) + ix3 * dx;
                 nodes.push_back(makeUbTuple((float)x1a, (float)x2a, (float)x3a));
                 node1Index = nodes.size() - 1;
 
                 for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) {
                     if (bc->hasBoundaryConditionFlag(D3Q27System::INVDIR[dir])) {
-                        double x1b, x2b, x3b, q = bc->getQ(dir);
+                        real x1b, x2b, x3b, q = bc->getQ(dir);
                         switch (dir) {
-                            case D3Q27System::DIR_P00:
+                            case DIR_P00:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_0P0:
+                            case DIR_0P0:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_M00:
+                            case DIR_M00:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_0M0:
+                            case DIR_0M0:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_PP0:
+                            case DIR_PP0:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_MP0:
+                            case DIR_MP0:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_MM0:
+                            case DIR_MM0:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_PM0:
+                            case DIR_PM0:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_00P:
+                            case DIR_00P:
                                 x1b = x1a;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_P0P:
+                            case DIR_P0P:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_0PP:
+                            case DIR_0PP:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_M0P:
+                            case DIR_M0P:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_0MP:
+                            case DIR_0MP:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_00M:
+                            case DIR_00M:
                                 x1b = x1a;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_P0M:
+                            case DIR_P0M:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_0PM:
+                            case DIR_0PM:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_M0M:
+                            case DIR_M0M:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_0MM:
+                            case DIR_0MM:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PPP:
+                            case DIR_PPP:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_MMM:
+                            case DIR_MMM:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PPM:
+                            case DIR_PPM:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_MMP:
+                            case DIR_MMP:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_PMP:
+                            case DIR_PMP:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_MPM:
+                            case DIR_MPM:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PMM:
+                            case DIR_PMM:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_MPP:
+                            case DIR_MPP:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
@@ -760,6 +766,8 @@ void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vecto
 ////////////////////////////////////////////////////////////////////////////
 vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
 {
+    using namespace vf::lbm::dir;
+
     vector<pair<GbPoint3D, GbPoint3D>> QsLineSet;
     pair<GbPoint3D, GbPoint3D> pointpair;
 
@@ -774,7 +782,7 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         SPtr<BCArray3D> bcMatrix  = kernel->getBCProcessor()->getBCArray();
         UbTupleDouble3 nodeOffset = grid.lock()->getNodeOffset(block);
 
-        // Check whether top row is double in the system or not
+        // Check whether top row is real in the system or not
         bool include_N_Face  = false; // x1=[0..blocknx1[ && x3=[0..blocknx3[
         bool include_E_Face  = false; // x2=[0..blocknx2[ && x3=[0..blocknx3[
         bool include_T_Face  = false; // x1=[0..blocknx1[ && x2=[0..blocknx2[
@@ -782,17 +790,17 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         bool include_TN_Edge = false; //(x1/x2/x3)=([0..blocknx1[/blocknx2/blocknx1)
         bool include_TE_Edge = false; //(x1/x2/x3)=(blocknx1/[0..blocknx2[/blocknx2)
         if (block) {
-            if (!block->getConnector(D3Q27System::DIR_0P0))
+            if (!block->getConnector(DIR_0P0))
                 include_N_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_P00))
+            if (!block->getConnector(DIR_P00))
                 include_E_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_00P))
+            if (!block->getConnector(DIR_00P))
                 include_T_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_PP0) && include_N_Face && include_E_Face)
+            if (!block->getConnector(DIR_PP0) && include_N_Face && include_E_Face)
                 include_NE_Edge = true;
-            if (!block->getConnector(D3Q27System::DIR_0PP) && include_T_Face && include_N_Face)
+            if (!block->getConnector(DIR_0PP) && include_T_Face && include_N_Face)
                 include_TN_Edge = true;
-            if (!block->getConnector(D3Q27System::DIR_P0P) && include_T_Face && include_E_Face)
+            if (!block->getConnector(DIR_P0P) && include_T_Face && include_E_Face)
                 include_TE_Edge = true;
         }
 
@@ -802,7 +810,7 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         set<std::vector<int>> &transNodeIndicesSet = pos->second;
         set<std::vector<int>>::iterator setPos;
 
-        double x1, x2, x3, dx;
+        real x1, x2, x3, dx;
         grid.lock()->calcStartCoordinatesAndDelta(block, x1, x2, x3, dx);
 
         for (setPos = transNodeIndicesSet.begin(); setPos != transNodeIndicesSet.end(); ++setPos) {
@@ -824,142 +832,142 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
                     if (!bcMatrix->hasBC(ix1, ix2, ix3))
                         continue;
                     SPtr<BoundaryConditions> bc = bcMatrix->getBC(ix1, ix2, ix3);
-                    double x1a                  = x1 - val<1>(nodeOffset) + dx * ix1;
-                    double x2a                  = x2 - val<2>(nodeOffset) + dx * ix2;
-                    double x3a                  = x3 - val<3>(nodeOffset) + dx * ix3;
+                    real x1a                  = x1 - val<1>(nodeOffset) + dx * ix1;
+                    real x2a                  = x2 - val<2>(nodeOffset) + dx * ix2;
+                    real x3a                  = x3 - val<3>(nodeOffset) + dx * ix3;
                     pointpair.first.setX1(x1a);
                     pointpair.first.setX2(x2a);
                     pointpair.first.setX3(x3a);
                     for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) {
                         if (bc->hasBoundaryConditionFlag(D3Q27System::INVDIR[dir])) {
-                            double x1b, x2b, x3b, q = bc->getQ(dir);
+                            real x1b, x2b, x3b, q = bc->getQ(dir);
                             switch (dir) {
-                                case D3Q27System::DIR_P00:
+                                case DIR_P00:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_0P0:
+                                case DIR_0P0:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_M00:
+                                case DIR_M00:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_0M0:
+                                case DIR_0M0:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_PP0:
+                                case DIR_PP0:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_MP0:
+                                case DIR_MP0:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_MM0:
+                                case DIR_MM0:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_PM0:
+                                case DIR_PM0:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_00P:
+                                case DIR_00P:
                                     x1b = x1a;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_P0P:
+                                case DIR_P0P:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_0PP:
+                                case DIR_0PP:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_M0P:
+                                case DIR_M0P:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_0MP:
+                                case DIR_0MP:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_00M:
+                                case DIR_00M:
                                     x1b = x1a;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_P0M:
+                                case DIR_P0M:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_0PM:
+                                case DIR_0PM:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_M0M:
+                                case DIR_M0M:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_0MM:
+                                case DIR_0MM:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PPP:
+                                case DIR_PPP:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_MMM:
+                                case DIR_MMM:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PPM:
+                                case DIR_PPM:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_MMP:
+                                case DIR_MMP:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_PMP:
+                                case DIR_PMP:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_MPM:
+                                case DIR_MPM:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PMM:
+                                case DIR_PMM:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_MPP:
+                                case DIR_MPP:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
index 4e588e96adbd42102a38cf3ee8ec27cd49e87dbf..80a58efccb36588111ddf5301f3fb68068e20958 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
@@ -75,8 +75,8 @@ public:
     virtual void addBCAdapter(const SPtr<BCAdapter> bcAdapter) { bcAdapters.push_back(bcAdapter); }
     void deleteBCAdapter() { bcAdapters.clear(); }
 
-    void initInteractor(const double &timeStep = 0) override;
-    void updateInteractor(const double &timestep = 0) override;
+    void initInteractor(const real &timeStep = 0) override;
+    void updateInteractor(const real &timestep = 0) override;
 
     void setReinitWithStoredQs(bool reinitWithStoredQsFlag) { this->reinitWithStoredQsFlag = reinitWithStoredQsFlag; }
 
@@ -112,9 +112,9 @@ protected:
     BcNodeIndicesMap bcNodeIndicesMap;
 
     void initRayVectors();
-    double rayX1[D3Q27System::FENDDIR + 1];
-    double rayX2[D3Q27System::FENDDIR + 1];
-    double rayX3[D3Q27System::FENDDIR + 1];
+    real rayX1[D3Q27System::FENDDIR + 1];
+    real rayX2[D3Q27System::FENDDIR + 1];
+    real rayX3[D3Q27System::FENDDIR + 1];
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
index c7b13d3834a6bb822b945f9425ba21b8d5be399d..d569d3c0f16b24ea7592c09b26a06b8be69e126c 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
@@ -52,7 +52,7 @@ D3Q27TriFaceMeshInteractor::D3Q27TriFaceMeshInteractor(SPtr<GbTriFaceMesh3D> tri
 //////////////////////////////////////////////////////////////////////////
 D3Q27TriFaceMeshInteractor::~D3Q27TriFaceMeshInteractor() = default;
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::initInteractor(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::initInteractor(const real &timeStep)
 {
     updateBlocks(); 
     setQs(timeStep);
@@ -77,7 +77,7 @@ bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D>
     SPtr<ILBMKernel> kernel = block->getKernel();
     SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
 
-    double internX1, internX2, internX3;
+    real internX1, internX2, internX3;
 
     int startIX1 = 0, startIX2 = 0, startIX3 = 0;
     int stopIX1 = (int)bcArray->getNX1(), stopIX2 = (int)bcArray->getNX2(), stopIX3 = (int)bcArray->getNX3();
@@ -120,8 +120,10 @@ bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D>
 }
 //////////////////////////////////////////////////////////////////////////
 // E.F. /4/16/2013
-void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::setQs(const real &timeStep)
 {
+    using namespace vf::lbm::dir;
+
     UBLOGML(logDEBUG1, "\nLBMTriFaceMeshInteractor - setQs start ");
     if (!this->grid.lock())
         throw UbException(UB_EXARGS, "ups, no grid.lock()!!");
@@ -364,9 +366,9 @@ void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
                 //            tmpSolidNodesFromOtherInteractors[block];
                 double q, distance;
 
-                double &nodeDx1 = nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                double &nodeDx2 = nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                double &nodeDx3 = nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                double &nodeDx1 = nodeDeltaToNeigh[level][DIR_P00];
+                double &nodeDx2 = nodeDeltaToNeigh[level][DIR_0P0];
+                double &nodeDx3 = nodeDeltaToNeigh[level][DIR_00P];
 
                 // fuer OBB-Test
                 double qEinflussDelta = 1.1 * sqrt(nodeDx1 * nodeDx1 + nodeDx2 * nodeDx2 + nodeDx3 * nodeDx3);
@@ -591,8 +593,10 @@ void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
 //  1. fuer nicht markierte Bloecke genuegt EIN pointInObject(Dreicksnetz)-Test um den gesamten Block bei Erfolg als
 //  â€žnot activeâ€œ zu markieren
 //  2. fuer markiertre Bloecke wird ein rekursiver Fuellalgorithmus durchgefuehrt
-void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::initInteractor2(const real &timeStep)
 {
+    using namespace vf::lbm::dir;
+
     UBLOGML(logDEBUG1, "\nLBMTriFaceMeshInteractor - initInteractor start ");
     if (!this->grid.lock())
         throw UbException(UB_EXARGS, "ups, no grid.lock()!!");
@@ -736,7 +740,7 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
 
     // notwendige variablen initialisieren (u.a. blockDeltas des groben levels)
     float triPoints[3][3];
-    float vx1 = 0.0, vx2 = 0.0, vx3 = 0.0;
+    real vx1 = 0.0, vx2 = 0.0, vx3 = 0.0;
     unsigned counterTriBoxOverlap = 0, counterAABBTriFace = 0, counterHalfspace = 0, counterBilligOBB = 0;
     std::vector<GbTriFaceMesh3D::TriFace> &triangles = *mesh->getTriangles();
     std::vector<GbTriFaceMesh3D::Vertex> &nodes      = *mesh->getNodes();
@@ -880,9 +884,9 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
                 std::set<std::vector<int>> &solidsFromOtherInteractors = tmpSolidNodesFromOtherInteractors[block];
                 double q, internX1, internX2, internX3, distance;
 
-                double &nodeDx1 = nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                double &nodeDx2 = nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                double &nodeDx3 = nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                double &nodeDx1 = nodeDeltaToNeigh[level][DIR_P00];
+                double &nodeDx2 = nodeDeltaToNeigh[level][DIR_0P0];
+                double &nodeDx3 = nodeDeltaToNeigh[level][DIR_00P];
 
                 // fuer OBB-Test
                 double qEinflussDelta = 1.1 * sqrt(nodeDx1 * nodeDx1 + nodeDx2 * nodeDx2 + nodeDx3 * nodeDx3);
@@ -1181,9 +1185,9 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
 
                 std::set<UbTupleInt3> &solidNodeIndices = this->solidNodeIndicesMap[block];
 
-                float nodeDeltaX1 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                float nodeDeltaX2 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                float nodeDeltaX3 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                float nodeDeltaX1 = (float)nodeDeltaToNeigh[level][DIR_P00];
+                float nodeDeltaX2 = (float)nodeDeltaToNeigh[level][DIR_0P0];
+                float nodeDeltaX3 = (float)nodeDeltaToNeigh[level][DIR_00P];
 
                 // flagfield matrix initialisieren
                 CbArray3D<FLAGS> flagField(blocknx1, blocknx2, blocknx3, UNDEF_FLAG);
@@ -1489,7 +1493,7 @@ void D3Q27TriFaceMeshInteractor::refineBlockGridToLevel(int level, double startD
     UBLOG(logDEBUG1, " - refine done");
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::updateMovedGeometry(const double &timeStep) {}
+void D3Q27TriFaceMeshInteractor::updateMovedGeometry(const real &timeStep) {}
 ////////////////////////////////////////////////////////////////////////////
 void D3Q27TriFaceMeshInteractor::recursiveGridFill(CbArray3D<FLAGS> &flagfield, const short &xs, const short &ys,
                                                    const short &zs, const FLAGS &type)
@@ -1561,9 +1565,9 @@ UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForces()
     ////return getForcesTriangle();
     // this->calculateForces();
 
-    double forceX1 = 0.0;
-    double forceX2 = 0.0;
-    double forceX3 = 0.0;
+    real forceX1 = 0.0;
+    real forceX2 = 0.0;
+    real forceX3 = 0.0;
 
     // double area = 0.0;
 
@@ -1582,9 +1586,9 @@ UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForces()
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForcesTriangle()
 {
-    double forceX1 = 0.0;
-    double forceX2 = 0.0;
-    double forceX3 = 0.0;
+    real forceX1 = 0.0;
+    real forceX2 = 0.0;
+    real forceX3 = 0.0;
 
     // D3Q19BlockGrid& grid.lock() = dynamic_cast<D3Q19BlockGrid&>(*this->grid.lock());
     ////   CoordinateTransformation3D *trafo = this->grid.lock()->getTransformation();
@@ -1829,7 +1833,7 @@ string D3Q27TriFaceMeshInteractor::toString()
     return ss.str();
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const double & /*timeStep*/)
+void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const real & /*timeStep*/)
 {
     // alle solid Bloecke wieder solid setzen
     std::vector<SPtr<Block3D>> &solidBlocks = this->getSolidBlockSet();
@@ -1902,7 +1906,7 @@ void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const double & /*timeStep*/)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::updateInteractor(const double &timestep)
+void D3Q27TriFaceMeshInteractor::updateInteractor(const real &timestep)
 {
     D3Q27Interactor::updateInteractor(timestep);
 }
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
index 9ac8bfc48a4fda3612b0781d93496cce723d2cd8..9e42de660502b33048abf31f5e831902134d3826 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
@@ -33,14 +33,14 @@ public:
 
     ~D3Q27TriFaceMeshInteractor() override;
 
-    void initInteractor(const double &timeStep = 0) override;
-    virtual void initInteractor2(const double &timeStep = 0);
+    void initInteractor(const real &timeStep = 0) override;
+    virtual void initInteractor2(const real &timeStep = 0);
 
-    void updateInteractor(const double &timestep = 0) override;
+    void updateInteractor(const real &timestep = 0) override;
 
-    void updateMovedGeometry(const double &timeStep = 0);
-    void setQs(const double &timeStep);
-    void refineBlockGridToLevel(int level, double startDistance, double stopDistance);
+    void updateMovedGeometry(const real &timeStep = 0);
+    void setQs(const real &timeStep);
+    void refineBlockGridToLevel(int level, real startDistance, real stopDistance);
 
     bool setDifferencesToGbObject3D(const SPtr<Block3D> block) override;
 
@@ -60,36 +60,36 @@ public:
     void calculateStresses();
     void calculateStressesAlternativ();
 
-    void calcStressesLine(UbTupleDouble6 &stresses, const double &weight, const UbTupleDouble6 &stvW,
+    void calcStressesLine(UbTupleDouble6 &stresses, const real &weight, const UbTupleDouble6 &stvW,
                           const UbTupleDouble6 &stvE);
-    void calcStressesFace(UbTupleDouble6 &stresses, const double &weightX, const double &weightY,
+    void calcStressesFace(UbTupleDouble6 &stresses, const real &weightX, const real &weightY,
                           const UbTupleDouble6 &stvSW, const UbTupleDouble6 &stvSE, const UbTupleDouble6 &stvNE,
                           const UbTupleDouble6 &stvNW);
-    void calcStressesCube(UbTupleDouble6 &stresses, const double &weightX, const double &weightY, const double &weightZ,
+    void calcStressesCube(UbTupleDouble6 &stresses, const real &weightX, const real &weightY, const real &weightZ,
                           const UbTupleDouble6 &stvBSW, const UbTupleDouble6 &stvBSE, const UbTupleDouble6 &stvBNE,
                           const UbTupleDouble6 &stvBNW, const UbTupleDouble6 &stvTSW, const UbTupleDouble6 &stvTSE,
                           const UbTupleDouble6 &stvTNE, const UbTupleDouble6 &stvTNW);
 
     void calculatePressure();
-    void calcPressureLine(double &p, const double &weight, const double &pW, const double &pE);
-    void calcPressureFace(double &p, const double &weightX, const double &weightY, const double &pSW, const double &pSE,
-                          const double &pNE, const double &pNW);
-    void calcPressureCube(double &p, const double &weightX, const double &weightY, const double &weightZ,
-                          const double &pBSW, const double &pBSE, const double &pBNE, const double &pBNW,
-                          const double &pTSW, const double &pTSE, const double &pTNE, const double &pTNW);
-
-    void setForceShift(double forceshift)
+    void calcPressureLine(real &p, const real &weight, const real &pW, const real &pE);
+    void calcPressureFace(real &p, const real &weightX, const real &weightY, const real &pSW, const real &pSE,
+                          const real &pNE, const real &pNW);
+    void calcPressureCube(real &p, const real &weightX, const real &weightY, const real &weightZ,
+                          const real &pBSW, const real &pBSE, const real &pBNE, const real &pBNW,
+                          const real &pTSW, const real &pTSE, const real &pTNE, const real &pTNW);
+
+    void setForceShift(real forceshift)
     {
         this->forceshift       = forceshift;
         this->forceshiftpolicy = true;
     }
-    void setVelocityShift(double velocityshift)
+    void setVelocityShift(real velocityshift)
     {
         this->velocityshift       = velocityshift;
         this->velocityshiftpolicy = true;
     }
-    double getForceShift() { return this->forceshift; }
-    double getVelocityShift() { return this->velocityshift; }
+    real getForceShift() { return this->forceshift; }
+    real getVelocityShift() { return this->velocityshift; }
     bool getForceShiftPolicy() { return forceshiftpolicy; }
     bool getVelocityShiftPolicy() { return velocityshiftpolicy; }
 
@@ -107,7 +107,7 @@ protected:
     bool useHalfSpace{ true };
     bool regardPIOTest{ true };
 
-    void reinitWithStoredQs(const double &timeStep);
+    void reinitWithStoredQs(const real &timeStep);
     //   bool reinitWithStoredQsFlag;
     std::map<SPtr<Block3D>, std::map<UbTupleInt3, std::vector<float>>>
         bcNodeIndicesAndQsMap; //!!! es kann sein, dass in diesem interactor
diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
index 84526c62598b1d718b1f179228ae2a3f51839856..e08a0283339b6e5976c7439ea2b64142c00ef7c4 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
@@ -34,7 +34,7 @@
 #include "Interactor3D.h"
 
 #include "UbException.h"
-#include <basics/utilities/UbMath.h>
+//#include <basics/utilities/UbMath.h>
 #include <fstream>
 #include <geometry3d/GbCuboid3D.h>
 
@@ -68,44 +68,44 @@ Interactor3D::Interactor3D(SPtr<GbObject3D> geoObject3D, SPtr<Grid3D> grid, int
 //////////////////////////////////////////////////////////////////////////
 Interactor3D::~Interactor3D() = default;
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                            double maxX3, double delta)
+bool Interactor3D::arePointsInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                            real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result && this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3);
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                             double maxX3, double delta)
+bool Interactor3D::arePointsOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                             real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result && (!this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3));
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                             double maxX3, double delta)
+bool Interactor3D::arePointsCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                             real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result || this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3);
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                           double maxX3, double delta)
+bool Interactor3D::isBlockOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                           real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -144,8 +144,8 @@ bool Interactor3D::isBlockOutsideGeoObject(double minX1, double minX2, double mi
     }
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                          double maxX3, double delta)
+bool Interactor3D::isBlockInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                          real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -184,8 +184,8 @@ bool Interactor3D::isBlockInsideGeoObject(double minX1, double minX2, double min
     }
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                           double maxX3, double delta)
+bool Interactor3D::isBlockCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                           real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -226,9 +226,9 @@ bool Interactor3D::isBlockCuttingGeoObject(double minX1, double minX2, double mi
 //////////////////////////////////////////////////////////////////////////
 void Interactor3D::setSolidBlock(SPtr<Block3D> block)
 {
-    double minX1, minX2, minX3, maxX1, maxX2, maxX3;
+    real minX1, minX2, minX3, maxX1, maxX2, maxX3;
 
-    double deltaX               = grid.lock()->getDeltaX(block);
+    real deltaX               = grid.lock()->getDeltaX(block);
     UbTupleDouble3 blockLengths = grid.lock()->getBlockLengths(block);
     UbTupleDouble3 org          = grid.lock()->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset   = grid.lock()->getNodeOffset(block);
@@ -257,9 +257,9 @@ void Interactor3D::setSolidBlock(SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void Interactor3D::setBCBlock(SPtr<Block3D> block)
 {
-    double minX1, minX2, minX3, maxX1, maxX2, maxX3;
+    real minX1, minX2, minX3, maxX1, maxX2, maxX3;
 
-    double deltaX               = grid.lock()->getDeltaX(block);
+    real deltaX               = grid.lock()->getDeltaX(block);
     UbTupleDouble3 blockLengths = grid.lock()->getBlockLengths(block);
     UbTupleDouble3 org          = grid.lock()->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset   = grid.lock()->getNodeOffset(block);
@@ -298,7 +298,7 @@ void Interactor3D::updateBlocks()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void Interactor3D::updateInteractor(const double & /*timeStep*/)
+void Interactor3D::updateInteractor(const real & /*timeStep*/)
 {
     UB_THROW(UbException("Interactor3D::updateInteractor - toDo"));
 }
diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
index 9727bf636085c7c0d24a9108acc71925af36e5d1..3da0ebbca0bca020b5cd0b2cdd9e8acc8564466a 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
@@ -39,6 +39,7 @@
 
 #include "UbSystem.h"
 #include "UbTuple.h"
+#include "lbm/constants/D3Q27.h"
 
 class Block3D;
 class Grid3D;
@@ -57,8 +58,8 @@ public:
     Interactor3D(SPtr<GbObject3D> geoObject3D, SPtr<Grid3D> grid, int type, Interactor3D::Accuracy a);
 
     virtual ~Interactor3D();
-    virtual void initInteractor(const double &timestep = 0) = 0;
-    virtual void updateInteractor(const double &timestep = 0) = 0;
+    virtual void initInteractor(const real &timestep = 0) = 0;
+    virtual void updateInteractor(const real &timestep = 0) = 0;
 
     void setSolidBlock(SPtr<Block3D> block);
     void setBCBlock(SPtr<Block3D> block);
@@ -96,27 +97,27 @@ protected:
     //! detect that points are inside object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                  double delta);
+    bool arePointsInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                  real delta);
 
     //! detect that points aren't inside object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                   double delta);
+    bool arePointsOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                   real delta);
 
     //! detect that points are cutting object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                   double delta);
-
-    bool isBlockOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                 double delta);
-    bool isBlockInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                double delta);
-    bool isBlockCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                 double delta);
+    bool arePointsCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                   real delta);
+
+    bool isBlockOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                 real delta);
+    bool isBlockInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                real delta);
+    bool isBlockCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                 real delta);
 
     void updateBlocks();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
index 50a5339ea01d70ff0076ef81bda16db22e56ffe7..ffe11c011334da5fe8b3f7050ff817245f85e0a1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
@@ -6,6 +6,7 @@
 #include "D3Q27System.h"
 #include "DataSet3D.h"
 #include "Block3D.h"
+#include "lbm/constants/NumericConstants.h"
 
 #define PROOF_CORRECTNESS
 
@@ -40,7 +41,9 @@ SPtr<LBMKernel> BGKLBMKernel::clone()
 void BGKLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+   using namespace vf::lbm::constant;
+   using namespace vf::lbm::dir;
 
     // initializing of forcing stuff
     if (withForcing) {
@@ -67,9 +70,9 @@ void BGKLBMKernel::calculate(int step)
         std::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
-    LBMReal drho, vx1, vx2, vx3;
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
+    real drho, vx1, vx2, vx3;
     const int bcArrayMaxX1 = (int)bcArray->getNX1();
     const int bcArrayMaxX2 = (int)bcArray->getNX2();
     const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -135,7 +138,7 @@ void BGKLBMKernel::calculate(int step)
                     vx3 = f[DIR_00P] - f[DIR_00M] + f[DIR_P0P] - f[DIR_M0M] - f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] - f[DIR_0MM] - f[DIR_0PM] + f[DIR_0MP] + f[DIR_PPP] +
                           f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] - f[DIR_PPM] - f[DIR_MMM] - f[DIR_PMM] - f[DIR_MPM];
 
-                    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+                    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
                     feq[DIR_000] = c8o27 * (drho - cu_sq);
                     feq[DIR_P00]    = c2o27 * (drho + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq);
@@ -244,10 +247,10 @@ void BGKLBMKernel::calculate(int step)
                     }
                     //////////////////////////////////////////////////////////////////////////
 #ifdef PROOF_CORRECTNESS
-                    LBMReal rho_post = f[DIR_000] + f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] + f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] +
+                    real rho_post = f[DIR_000] + f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] + f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] +
                                        f[DIR_MP0] + f[DIR_P0P] + f[DIR_M0M] + f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] + f[DIR_0MM] + f[DIR_0PM] + f[DIR_0MP] + f[DIR_PPP] +
                                        f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] + f[DIR_PPM] + f[DIR_MMM] + f[DIR_PMM] + f[DIR_MPM];
-                    LBMReal dif = drho - rho_post;
+                    real dif = drho - rho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -263,35 +266,35 @@ void BGKLBMKernel::calculate(int step)
                     //////////////////////////////////////////////////////////////////////////
                     // write distribution
                     //////////////////////////////////////////////////////////////////////////
-                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)     = f[D3Q27System::INV_P00];
-                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)     = f[D3Q27System::INV_0P0];
-                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)     = f[D3Q27System::INV_00P];
-                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)    = f[D3Q27System::INV_PP0];
-                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)   = f[D3Q27System::INV_MP0];
-                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)    = f[D3Q27System::INV_P0P];
-                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)   = f[D3Q27System::INV_M0P];
-                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)    = f[D3Q27System::INV_0PP];
-                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)   = f[D3Q27System::INV_0MP];
-                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)   = f[D3Q27System::INV_PPP];
-                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)  = f[D3Q27System::INV_MPP];
-                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)  = f[D3Q27System::INV_PMP];
-                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)     = f[INV_P00];
+                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)     = f[INV_0P0];
+                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)     = f[INV_00P];
+                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)    = f[INV_PP0];
+                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)   = f[INV_MP0];
+                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)    = f[INV_P0P];
+                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)   = f[INV_M0P];
+                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)    = f[INV_0PP];
+                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)   = f[INV_0MP];
+                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)   = f[INV_PPP];
+                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)  = f[INV_MPP];
+                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)  = f[INV_PMP];
+                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[INV_MMP];
 
-                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[D3Q27System::INV_M00];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[D3Q27System::INV_0M0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[D3Q27System::INV_00M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[D3Q27System::INV_MM0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[D3Q27System::INV_PM0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[D3Q27System::INV_M0M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[D3Q27System::INV_P0M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[D3Q27System::INV_0MM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[D3Q27System::INV_0PM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[D3Q27System::INV_PMM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[D3Q27System::INV_MPM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[D3Q27System::INV_PPM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[INV_M00];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[INV_0M0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[INV_00M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[INV_MM0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[INV_PM0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[INV_M0M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[INV_P0M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[INV_0MM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[INV_0PM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[INV_MMM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[INV_PMM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[INV_MPM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[INV_PPM];
 
-                    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+                    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
                     //////////////////////////////////////////////////////////////////////////
                 }
             }
@@ -299,4 +302,4 @@ void BGKLBMKernel::calculate(int step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-double BGKLBMKernel::getCalculationTime() { return 0.0; }
+real BGKLBMKernel::getCalculationTime() { return 0.0; }
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
index 9d17a8cc7677db7a142f4340dcdeaf38e268d214..099e9c093a6681c4c511a0fb02f9f023dafa3253 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
@@ -12,21 +12,21 @@ public:
     ~BGKLBMKernel() override;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override;
+    real getCalculationTime() override;
 
 private:
     void initDataSet();
     // void collideAllCompressible();
     // void collideAllIncompressible();
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
 
     mu::value_type muX1, muX2, muX3;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
index b1e48abd9ed3c0a2b4bff26090c20512d94eff7f..45cc9651ca25dcd0b0904b753f6f1899a7a09d00 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
@@ -9,7 +9,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CompressibleCumulant4thOrderViscosityLBMKernel::CompressibleCumulant4thOrderViscosityLBMKernel()
@@ -49,7 +50,7 @@ SPtr<LBMKernel> CompressibleCumulant4thOrderViscosityLBMKernel::clone()
    } 
    else
    {
-      OxxPyyPzz = one;
+      OxxPyyPzz = c1o1;
    }
 
    dynamicPointerCast<CompressibleCumulant4thOrderViscosityLBMKernel>(kernel)->OxxPyyPzz = this->OxxPyyPzz;
@@ -105,20 +106,20 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth;
 
-   LBMReal omega = collFactor;
+   real omega = collFactor;
    //LBMReal OxyyPxzz  = eight*(-two+omega)*(one+two*omega)/(-eight-fourteen*omega+seven*omega*omega);//one;
    //LBMReal OxyyMxzz  = eight*(-two+omega)*(-seven+four*omega)/(fiftysix-fifty*omega+nine*omega*omega);//one;
    //LBMReal Oxyz      = twentyfour*(-two+omega)*(-two-seven*omega+three*omega*omega)/(fourtyeight+c152*omega-c130*omega*omega+twentynine*omega*omega*omega);
-   LBMReal OxyyPxzz  = 8.0*(omega-2.0)*(OxxPyyPzz*(3.0*omega-1.0)-5.0*omega)/(8.0*(5.0-2.0*omega)*omega+OxxPyyPzz*(8.0+omega*(9.0*omega-26.0)));
-   LBMReal OxyyMxzz  = 8.0*(omega-2.0)*(omega+OxxPyyPzz*(3.0*omega-7.0))/(OxxPyyPzz*(56.0-42.0*omega+9.0*omega*omega)-8.0*omega);
-   LBMReal Oxyz      = 24.0*(omega-2.0)*(4.0*omega*omega+omega*OxxPyyPzz*(18.0-13.0*omega)+OxxPyyPzz*OxxPyyPzz*(2.0+omega*(6.0*omega-11.0)))/(16.0*omega*omega*(omega-6.0)-2.0*omega*OxxPyyPzz*(216.0+5.0*omega*(9.0*omega-46.0))+OxxPyyPzz*OxxPyyPzz*(omega*(3.0*omega-10.0)*(15.0*omega-28.0)-48.0));
+   real OxyyPxzz  = 8.0*(omega-2.0)*(OxxPyyPzz*(3.0*omega-1.0)-5.0*omega)/(8.0*(5.0-2.0*omega)*omega+OxxPyyPzz*(8.0+omega*(9.0*omega-26.0)));
+   real OxyyMxzz  = 8.0*(omega-2.0)*(omega+OxxPyyPzz*(3.0*omega-7.0))/(OxxPyyPzz*(56.0-42.0*omega+9.0*omega*omega)-8.0*omega);
+   real Oxyz      = 24.0*(omega-2.0)*(4.0*omega*omega+omega*OxxPyyPzz*(18.0-13.0*omega)+OxxPyyPzz*OxxPyyPzz*(2.0+omega*(6.0*omega-11.0)))/(16.0*omega*omega*(omega-6.0)-2.0*omega*OxxPyyPzz*(216.0+5.0*omega*(9.0*omega-46.0))+OxxPyyPzz*OxxPyyPzz*(omega*(3.0*omega-10.0)*(15.0*omega-28.0)-48.0));
 
    //LBMReal A = (four + two*omega - three*omega*omega) / (two - seven*omega + five*omega*omega);
    //LBMReal B = (four + twentyeight*omega - fourteen*omega*omega) / (six - twentyone*omega + fiveteen*omega*omega);
 
-   LBMReal A = (4.0*omega*omega+2.0*omega*OxxPyyPzz*(omega-6.0)+OxxPyyPzz*OxxPyyPzz*(omega*(10.0-3.0*omega)-4.0))/((omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
+   real A = (4.0*omega*omega+2.0*omega*OxxPyyPzz*(omega-6.0)+OxxPyyPzz*OxxPyyPzz*(omega*(10.0-3.0*omega)-4.0))/((omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
    //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-   LBMReal B = (4.0*omega*OxxPyyPzz*(9.0*omega-16.0)-4.0*omega*omega-2.0*OxxPyyPzz*OxxPyyPzz*(2.0+9.0*omega*(omega-2.0)))/(3.0*(omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
+   real B = (4.0*omega*OxxPyyPzz*(9.0*omega-16.0)-4.0*omega*omega-2.0*OxxPyyPzz*OxxPyyPzz*(2.0+9.0*omega*(omega-2.0)))/(3.0*(omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
 
    for (int x3 = minX3; x3 < maxX3; x3++)
    {
@@ -152,50 +153,50 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               ////////////////////////////////////////////////////////////////////////////////////
+               real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                   (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                   ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-               LBMReal rho = one+drho;
+               real rho = c1o1 +drho;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+               real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                   (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                   (mfcbb-mfabb))/rho;
-               LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+               real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                   (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                   (mfbcb-mfbab))/rho;
-               LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+               real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                   (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                   (mfbbc-mfbba))/rho;
                ////////////////////////////////////////////////////////////////////////////////////
@@ -204,9 +205,9 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -218,20 +219,20 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
          ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal oMdrho = one; // comp special
+               real oMdrho = c1o1; // comp special
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal m0, m1, m2;
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real m0, m1, m2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx*vvx;
                vy2 = vvy*vvy;
                vz2 = vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimitP = 0.01;// * 0.0001f;
-               LBMReal qudricLimitM = 0.01;// * 0.0001f;
-               LBMReal qudricLimitD = 0.01;// * 0.001f;
+               real wadjust;
+               real qudricLimitP = 0.01;// * 0.0001f;
+               real qudricLimitM = 0.01;// * 0.0001f;
+               real qudricLimitD = 0.01;// * 0.001f;
                //LBMReal s9 = minusomega;
                //test
                //s9 = 0.;
@@ -247,7 +248,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o36 * oMdrho;
                mfaab = m1-m0 * vvz;
-               mfaac = m2-two*	m1 * vvz+vz2 * m0;
+               mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfabc;
                m1 = mfabc-mfaba;
@@ -255,7 +256,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaba = m0;
                m0 += c1o9 * oMdrho;
                mfabb = m1-m0 * vvz;
-               mfabc = m2-two*	m1 * vvz+vz2 * m0;
+               mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfacc;
                m1 = mfacc-mfaca;
@@ -263,7 +264,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o36 * oMdrho;
                mfacb = m1-m0 * vvz;
-               mfacc = m2-two*	m1 * vvz+vz2 * m0;
+               mfacc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbac;
@@ -272,7 +273,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c1o9 * oMdrho;
                mfbab = m1-m0 * vvz;
-               mfbac = m2-two*	m1 * vvz+vz2 * m0;
+               mfbac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbba+mfbbc;
                m1 = mfbbc-mfbba;
@@ -280,7 +281,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbba = m0;
                m0 += c4o9 * oMdrho;
                mfbbb = m1-m0 * vvz;
-               mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbca+mfbcc;
                m1 = mfbcc-mfbca;
@@ -288,7 +289,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbca = m0;
                m0 += c1o9 * oMdrho;
                mfbcb = m1-m0 * vvz;
-               mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbcc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcac;
@@ -297,7 +298,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o36 * oMdrho;
                mfcab = m1-m0 * vvz;
-               mfcac = m2-two*	m1 * vvz+vz2 * m0;
+               mfcac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcba+mfcbc;
                m1 = mfcbc-mfcba;
@@ -305,7 +306,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcba = m0;
                m0 += c1o9 * oMdrho;
                mfcbb = m1-m0 * vvz;
-               mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcca+mfccc;
                m1 = mfccc-mfcca;
@@ -313,7 +314,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcca = m0;
                m0 += c1o36 * oMdrho;
                mfccb = m1-m0 * vvz;
-               mfccc = m2-two*	m1 * vvz+vz2 * m0;
+               mfccc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -325,14 +326,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o6 * oMdrho;
                mfaba = m1-m0 * vvy;
-               mfaca = m2-two*	m1 * vvy+vy2 * m0;
+               mfaca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfacb;
                m1 = mfacb-mfaab;
                m0 = m2+mfabb;
                mfaab = m0;
                mfabb = m1-m0 * vvy;
-               mfacb = m2-two*	m1 * vvy+vy2 * m0;
+               mfacb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfacc;
                m1 = mfacc-mfaac;
@@ -340,7 +341,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o18 * oMdrho;
                mfabc = m1-m0 * vvy;
-               mfacc = m2-two*	m1 * vvy+vy2 * m0;
+               mfacc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbca;
@@ -349,14 +350,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c2o3 * oMdrho;
                mfbba = m1-m0 * vvy;
-               mfbca = m2-two*	m1 * vvy+vy2 * m0;
+               mfbca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbab+mfbcb;
                m1 = mfbcb-mfbab;
                m0 = m2+mfbbb;
                mfbab = m0;
                mfbbb = m1-m0 * vvy;
-               mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbac+mfbcc;
                m1 = mfbcc-mfbac;
@@ -364,7 +365,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbac = m0;
                m0 += c2o9 * oMdrho;
                mfbbc = m1-m0 * vvy;
-               mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcca;
@@ -373,14 +374,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o6 * oMdrho;
                mfcba = m1-m0 * vvy;
-               mfcca = m2-two*	m1 * vvy+vy2 * m0;
+               mfcca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcab+mfccb;
                m1 = mfccb-mfcab;
                m0 = m2+mfcbb;
                mfcab = m0;
                mfcbb = m1-m0 * vvy;
-               mfccb = m2-two*	m1 * vvy+vy2 * m0;
+               mfccb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcac+mfccc;
                m1 = mfccc-mfcac;
@@ -388,7 +389,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcac = m0;
                m0 += c1o18 * oMdrho;
                mfcbc = m1-m0 * vvy;
-               mfccc = m2-two*	m1 * vvy+vy2 * m0;
+               mfccc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -398,16 +399,16 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                m1 = mfcaa-mfaaa;
                m0 = m2+mfbaa;
                mfaaa = m0;
-               m0 += one* oMdrho;
+               m0 += c1o1* oMdrho;
                mfbaa = m1-m0 * vvx;
-               mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+               mfcaa = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfcba;
                m1 = mfcba-mfaba;
                m0 = m2+mfbba;
                mfaba = m0;
                mfbba = m1-m0 * vvx;
-               mfcba = m2-two*	m1 * vvx+vx2 * m0;
+               mfcba = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfcca;
                m1 = mfcca-mfaca;
@@ -415,7 +416,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o3 * oMdrho;
                mfbca = m1-m0 * vvx;
-               mfcca = m2-two*	m1 * vvx+vx2 * m0;
+               mfcca = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfcab;
@@ -423,21 +424,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                m0 = m2+mfbab;
                mfaab = m0;
                mfbab = m1-m0 * vvx;
-               mfcab = m2-two*	m1 * vvx+vx2 * m0;
+               mfcab = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabb+mfcbb;
                m1 = mfcbb-mfabb;
                m0 = m2+mfbbb;
                mfabb = m0;
                mfbbb = m1-m0 * vvx;
-               mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbb = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacb+mfccb;
                m1 = mfccb-mfacb;
                m0 = m2+mfbcb;
                mfacb = m0;
                mfbcb = m1-m0 * vvx;
-               mfccb = m2-two*	m1 * vvx+vx2 * m0;
+               mfccb = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfcac;
@@ -446,14 +447,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o3 * oMdrho;
                mfbac = m1-m0 * vvx;
-               mfcac = m2-two*	m1 * vvx+vx2 * m0;
+               mfcac = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabc+mfcbc;
                m1 = mfcbc-mfabc;
                m0 = m2+mfbbc;
                mfabc = m0;
                mfbbc = m1-m0 * vvx;
-               mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacc+mfccc;
                m1 = mfccc-mfacc;
@@ -461,7 +462,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfacc = m0;
                m0 += c1o9 * oMdrho;
                mfbcc = m1-m0 * vvx;
-               mfccc = m2-two*	m1 * vvx+vx2 * m0;
+               mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
 
@@ -505,47 +506,47 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////
                //4.
                //////////////////////////////
-               LBMReal O4 = one;
+               real O4 = c1o1;
                //////////////////////////////
-               //LBMReal O4        = omega;//TRT
+               //real O4        = omega;//TRT
                ////////////////////////////////////////////////////////////
                //5.
                //////////////////////////////
-               LBMReal O5 = one;
+               real O5 = c1o1;
                ////////////////////////////////////////////////////////////
                //6.
                //////////////////////////////
-               LBMReal O6 = one;
+               real O6 = c1o1;
                ////////////////////////////////////////////////////////////
 
 
                //central moments to cumulants
                //4.
-               LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-               LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-               LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+               real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-               LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-               LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-               LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcca = mfcca-(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcac = mfcac-(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+               real CUMacc = mfacc-(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                //5.
-               LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+               real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -555,9 +556,9 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
    //2.
    // linear combinations
-               LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-               LBMReal mxxMyy = mfcaa-mfaca;
-               LBMReal mxxMzz = mfcaa-mfaac;
+               real mxxPyyPzz = mfcaa+mfaca+mfaac;
+               real mxxMyy = mfcaa-mfaca;
+               real mxxMzz = mfcaa-mfaac;
 
                //////////////////////////////////////////////////////////////////////////
       // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -597,24 +598,24 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
 
-               LBMReal dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
-               LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+               real dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
+               real dyuy = dxux+omega * c3o2 * mxxMyy;
+               real dzuz = dxux+omega * c3o2 * mxxMzz;
 
-               LBMReal Dxy =-three*omega*mfbba;
-               LBMReal Dxz =-three*omega*mfbab;
-               LBMReal Dyz =-three*omega*mfabb;
+               real Dxy =-c3o1 *omega*mfbba;
+               real Dxz =-c3o1 *omega*mfbab;
+               real Dyz =-c3o1 *omega*mfabb;
 
 
 
                //relax
 
-               wadjust = OxxPyyPzz+(one-OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
-               mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
+               wadjust = OxxPyyPzz+(c1o1 -OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
+               mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)- c3o1 * (c1o1 -c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
 
               // mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-               mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
-               mxxMzz += omega * (-mxxMzz)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
+               mxxMyy += omega * (-mxxMyy)-c3o1 * (c1o1 +c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
+               mxxMzz += omega * (-mxxMzz)-c3o1 * (c1o1 +c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
 
                //////////////////////////////////////////////////////////////////////////
                //limiter-Scheise Teil 2
@@ -644,37 +645,37 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
                // linear combinations back
                mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+               mfaca = c1o3 * (-c2o1 *  mxxMyy+mxxMzz+mxxPyyPzz);
+               mfaac = c1o3 * (mxxMyy-c2o1 * mxxMzz+mxxPyyPzz);
 
                //3.
                // linear combinations
 
-               LBMReal mxxyPyzz = mfcba+mfabc;
-               LBMReal mxxyMyzz = mfcba-mfabc;
+               real mxxyPyzz = mfcba+mfabc;
+               real mxxyMyzz = mfcba-mfabc;
 
-               LBMReal mxxzPyyz = mfcab+mfacb;
-               LBMReal mxxzMyyz = mfcab-mfacb;
+               real mxxzPyyz = mfcab+mfacb;
+               real mxxzMyyz = mfcab-mfacb;
 
-               LBMReal mxyyPxzz = mfbca+mfbac;
-               LBMReal mxyyMxzz = mfbca-mfbac;
+               real mxyyPxzz = mfbca+mfbac;
+               real mxyyMxzz = mfbca-mfbac;
 
                //relax
                //////////////////////////////////////////////////////////////////////////
                //das ist der limiter
-               wadjust = Oxyz+(one-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
+               wadjust = Oxyz+(c1o1-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
                mfbbb += wadjust * (-mfbbb);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
                mxxyPyzz += wadjust * (-mxxyPyzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
                mxxyMyzz += wadjust * (-mxxyMyzz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
                mxxzPyyz += wadjust * (-mxxzPyyz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
                mxxzMyyz += wadjust * (-mxxzMyyz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
                mxyyPxzz += wadjust * (-mxyyPxzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
                mxyyMxzz += wadjust * (-mxyyMxzz);
                //////////////////////////////////////////////////////////////////////////
                //ohne limiter
@@ -725,12 +726,12 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                //CUMbbc += O4 * (-CUMbbc);
                //CUMbcb += O4 * (-CUMbcb);
                //CUMcbb += O4 * (-CUMcbb);
-               CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-               CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-               CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-               CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B + (one - O4) * (CUMbbc);
-               CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B + (one - O4) * (CUMbcb);
-               CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B + (one - O4) * (CUMcbb);
+               CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+               CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+               CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+               CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+               CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+               CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * B + (c1o1 - O4) * (CUMcbb);
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -746,31 +747,31 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
                //back cumulants to central moments
                //4.
-               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;
+               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho;
+               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho;
 
-               mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcca = CUMcca+(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcac = CUMcac+(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfacc = CUMacc+(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                //5.
-               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               mfccc = CUMccc-((-four *  mfbbb * mfbbb
+               mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                ////////////////////////////////////////////////////////////////////////////////////
@@ -786,22 +787,22 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
          //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
          ////////////////////////////////////////////////////////////////////////////////////
          // Z - Dir
-               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2-vvz) * c1o2;
+               m1 = -mfaac-c2o1* mfaab *  vvz+mfaaa                * (c1o1-vz2)- c1o1 * oMdrho * vz2;
+               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2+vvz) * c1o2;
                mfaaa = m0;
                mfaab = m1;
                mfaac = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-               m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+               m1 = -mfabc-c2o1* mfabb *  vvz+mfaba * (c1o1-vz2);
                m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                mfaba = m0;
                mfabb = m1;
                mfabc = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfacc-c2o1* mfacb *  vvz+mfaca                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfaca = m0;
                mfacb = m1;
@@ -809,21 +810,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-               m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+               m1 = -mfbac-c2o1* mfbab *  vvz+mfbaa * (c1o1-vz2);
                m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                mfbaa = m0;
                mfbab = m1;
                mfbac = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-               m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+               m1 = -mfbbc-c2o1* mfbbb *  vvz+mfbba * (c1o1-vz2);
                m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                mfbba = m0;
                mfbbb = m1;
                mfbbc = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-               m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+               m1 = -mfbcc-c2o1* mfbcb *  vvz+mfbca * (c1o1-vz2);
                m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                mfbca = m0;
                mfbcb = m1;
@@ -831,21 +832,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfcac- c2o1* mfcab *  vvz+mfcaa                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfcaa = m0;
                mfcab = m1;
                mfcac = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-               m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+               m1 = -mfcbc-c2o1* mfcbb *  vvz+mfcba * (c1o1-vz2);
                m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                mfcba = m0;
                mfcbb = m1;
                mfcbc = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+               m1 = -mfccc-c2o1* mfccb *  vvz+mfcca                  * (c1o1-vz2)-c1o9 * oMdrho * vz2;
                m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                mfcca = m0;
                mfccb = m1;
@@ -856,21 +857,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Y - Dir
                m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfaca-c2o1* mfaba *  vvy+mfaaa                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaaa = m0;
                mfaba = m1;
                mfaca = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+               m1 = -mfacb-c2o1* mfabb *  vvy+mfaab                  * (c1o1-vy2)-c2o3 * oMdrho * vy2;
                m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                mfaab = m0;
                mfabb = m1;
                mfacb = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfacc-c2o1* mfabc *  vvy+mfaac                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaac = m0;
                mfabc = m1;
@@ -878,21 +879,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-               m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+               m1 = -mfbca-c2o1* mfbba *  vvy+mfbaa * (c1o1-vy2);
                m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                mfbaa = m0;
                mfbba = m1;
                mfbca = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-               m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+               m1 = -mfbcb-c2o1* mfbbb *  vvy+mfbab * (c1o1-vy2);
                m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                mfbab = m0;
                mfbbb = m1;
                mfbcb = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-               m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+               m1 = -mfbcc-c2o1* mfbbc *  vvy+mfbac * (c1o1-vy2);
                m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                mfbac = m0;
                mfbbc = m1;
@@ -900,21 +901,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfcca-c2o1* mfcba *  vvy+mfcaa                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcaa = m0;
                mfcba = m1;
                mfcca = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+               m1 = -mfccb-c2o1* mfcbb *  vvy+mfcab                  * (c1o1-vy2)-c2o9 * oMdrho * vy2;
                m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                mfcab = m0;
                mfcbb = m1;
                mfccb = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfccc-c2o1* mfcbc *  vvy+mfcac                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcac = m0;
                mfcbc = m1;
@@ -925,21 +926,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // X - Dir
                m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcaa-c2o1* mfbaa *  vvx+mfaaa                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaaa = m0;
                mfbaa = m1;
                mfcaa = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcba-c2o1* mfbba *  vvx+mfaba                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaba = m0;
                mfbba = m1;
                mfcba = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcca-c2o1* mfbca *  vvx+mfaca                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaca = m0;
                mfbca = m1;
@@ -947,21 +948,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcab-c2o1* mfbab *  vvx+mfaab                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaab = m0;
                mfbab = m1;
                mfcab = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+               m1 = -mfcbb-c2o1* mfbbb *  vvx+mfabb                  * (c1o1-vx2)-c4o9 * oMdrho * vx2;
                m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabb = m0;
                mfbbb = m1;
                mfcbb = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfccb-c2o1* mfbcb *  vvx+mfacb                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfacb = m0;
                mfbcb = m1;
@@ -969,21 +970,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcac-c2o1* mfbac *  vvx+mfaac                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaac = m0;
                mfbac = m1;
                mfcac = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcbc-c2o1* mfbbc *  vvx+mfabc                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabc = m0;
                mfbbc = m1;
                mfcbc = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfccc-c2o1* mfbcc *  vvx+mfacc                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfacc = m0;
                mfbcc = m1;
@@ -994,11 +995,11 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = drho - drho_post;
+               real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1052,13 +1053,13 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double CompressibleCumulant4thOrderViscosityLBMKernel::getCalculationTime()
+real CompressibleCumulant4thOrderViscosityLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleCumulant4thOrderViscosityLBMKernel::setBulkViscosity(LBMReal value)
+void CompressibleCumulant4thOrderViscosityLBMKernel::setBulkViscosity(real value)
 {
    bulkViscosity = value;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
index 2cdd9c5b32b0068b5e586a7033a2456f72167d31..fc3e0dffb6abf836995aaecc95f07fc4fcaf1d64 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
@@ -21,29 +21,29 @@ public:
    ~CompressibleCumulant4thOrderViscosityLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    //! The value should not be equal to a shear viscosity
-   void setBulkViscosity(LBMReal value);
+   void setBulkViscosity(real value);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
-   LBMReal OxxPyyPzz; //omega2 (bulk viscosity)
-   LBMReal bulkViscosity;
+   real OxxPyyPzz; //omega2 (bulk viscosity)
+   real bulkViscosity;
 
 };
 #endif // CompressibleCumulant4thOrderViscosityLBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
index 5542d2c9851e362e012a2950600a5225441f6644..2552de70437992a5cbf79a13f40615e46b084fb6 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
@@ -7,7 +7,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CompressibleCumulantLBMKernel::CompressibleCumulantLBMKernel()
@@ -59,7 +60,7 @@ SPtr<LBMKernel> CompressibleCumulantLBMKernel::clone()
    }
    else
    {
-      dynamicPointerCast<CompressibleCumulantLBMKernel>(kernel)->OxxPyyPzz = one;
+      dynamicPointerCast<CompressibleCumulantLBMKernel>(kernel)->OxxPyyPzz = c1o1;
    }
    return kernel;
 }
@@ -113,7 +114,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth;
 
-   LBMReal omega = collFactor;
+   real omega = collFactor;
 
 
    //#pragma omp parallel num_threads(8)
@@ -154,50 +155,50 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   // a b c
                   //-1 0 1
 
-                  LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                  LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                  LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                  LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                  LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                  LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                  LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                  LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                  LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                  LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                  LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                  LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                  LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                  LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                  LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                  LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                  LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                  LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                  LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                  LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                  LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                  LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                  LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                  LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                  LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                  LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                  LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-                  ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+                  real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                  real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                  real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                  real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                  real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                  real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                  real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                  real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                  real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                  real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                  real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                  real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                  real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                  real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                  real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                  real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                  real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                  real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                  real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                  real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                  real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                  real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                  real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                  real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                  real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                  real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                  real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                      (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                      ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-                  LBMReal rho = one+drho;
+                  real rho = c1o1+drho;
                   ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+                  real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                      (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                      (mfcbb-mfabb))/rho;
-                  LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+                  real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                      (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                      (mfbcb-mfbab))/rho;
-                  LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+                  real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                      (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                      (mfbbc-mfbba))/rho;
                   ////////////////////////////////////////////////////////////////////////////////////
@@ -206,9 +207,9 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ///////////////////////////////////////////////////////////////////////////////////////////
                   if (withForcing)
                   {
-                     muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                     muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                     muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                     muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                     muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                     muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                      forcingX1 = muForcingX1.Eval();
                      forcingX2 = muForcingX2.Eval();
@@ -220,12 +221,12 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   }
                   ///////////////////////////////////////////////////////////////////////////////////////////               
             ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal oMdrho = one; // comp special
+                  real oMdrho = c1o1; // comp special
                   ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal m0, m1, m2;
-                  LBMReal vx2;
-                  LBMReal vy2;
-                  LBMReal vz2;
+                  real m0, m1, m2;
+                  real vx2;
+                  real vy2;
+                  real vz2;
                   vx2 = vvx*vvx;
                   vy2 = vvy*vvy;
                   vz2 = vvz*vvz;
@@ -249,7 +250,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaaa = m0;
                   m0 += c1o36 * oMdrho;
                   mfaab = m1-m0 * vvz;
-                  mfaac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaba+mfabc;
                   m1 = mfabc-mfaba;
@@ -257,7 +258,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaba = m0;
                   m0 += c1o9 * oMdrho;
                   mfabb = m1-m0 * vvz;
-                  mfabc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaca+mfacc;
                   m1 = mfacc-mfaca;
@@ -265,7 +266,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaca = m0;
                   m0 += c1o36 * oMdrho;
                   mfacb = m1-m0 * vvz;
-                  mfacc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfacc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbaa+mfbac;
@@ -274,7 +275,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbaa = m0;
                   m0 += c1o9 * oMdrho;
                   mfbab = m1-m0 * vvz;
-                  mfbac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbba+mfbbc;
                   m1 = mfbbc-mfbba;
@@ -282,7 +283,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbba = m0;
                   m0 += c4o9 * oMdrho;
                   mfbbb = m1-m0 * vvz;
-                  mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbca+mfbcc;
                   m1 = mfbcc-mfbca;
@@ -290,7 +291,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbca = m0;
                   m0 += c1o9 * oMdrho;
                   mfbcb = m1-m0 * vvz;
-                  mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbcc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcaa+mfcac;
@@ -299,7 +300,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcaa = m0;
                   m0 += c1o36 * oMdrho;
                   mfcab = m1-m0 * vvz;
-                  mfcac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfcac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcba+mfcbc;
                   m1 = mfcbc-mfcba;
@@ -307,7 +308,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcba = m0;
                   m0 += c1o9 * oMdrho;
                   mfcbb = m1-m0 * vvz;
-                  mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfcbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcca+mfccc;
                   m1 = mfccc-mfcca;
@@ -315,7 +316,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcca = m0;
                   m0 += c1o36 * oMdrho;
                   mfccb = m1-m0 * vvz;
-                  mfccc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -327,14 +328,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaaa = m0;
                   m0 += c1o6 * oMdrho;
                   mfaba = m1-m0 * vvy;
-                  mfaca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfaca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaab+mfacb;
                   m1 = mfacb-mfaab;
                   m0 = m2+mfabb;
                   mfaab = m0;
                   mfabb = m1-m0 * vvy;
-                  mfacb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfacb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaac+mfacc;
                   m1 = mfacc-mfaac;
@@ -342,7 +343,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaac = m0;
                   m0 += c1o18 * oMdrho;
                   mfabc = m1-m0 * vvy;
-                  mfacc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfacc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbaa+mfbca;
@@ -351,14 +352,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbaa = m0;
                   m0 += c2o3 * oMdrho;
                   mfbba = m1-m0 * vvy;
-                  mfbca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbab+mfbcb;
                   m1 = mfbcb-mfbab;
                   m0 = m2+mfbbb;
                   mfbab = m0;
                   mfbbb = m1-m0 * vvy;
-                  mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbcb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbac+mfbcc;
                   m1 = mfbcc-mfbac;
@@ -366,7 +367,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbac = m0;
                   m0 += c2o9 * oMdrho;
                   mfbbc = m1-m0 * vvy;
-                  mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbcc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcaa+mfcca;
@@ -375,14 +376,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcaa = m0;
                   m0 += c1o6 * oMdrho;
                   mfcba = m1-m0 * vvy;
-                  mfcca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfcca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcab+mfccb;
                   m1 = mfccb-mfcab;
                   m0 = m2+mfcbb;
                   mfcab = m0;
                   mfcbb = m1-m0 * vvy;
-                  mfccb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfccb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcac+mfccc;
                   m1 = mfccc-mfcac;
@@ -390,7 +391,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcac = m0;
                   m0 += c1o18 * oMdrho;
                   mfcbc = m1-m0 * vvy;
-                  mfccc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -400,16 +401,16 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   m1 = mfcaa-mfaaa;
                   m0 = m2+mfbaa;
                   mfaaa = m0;
-                  m0 += one* oMdrho;
+                  m0 += c1o1* oMdrho;
                   mfbaa = m1-m0 * vvx;
-                  mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcaa = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaba+mfcba;
                   m1 = mfcba-mfaba;
                   m0 = m2+mfbba;
                   mfaba = m0;
                   mfbba = m1-m0 * vvx;
-                  mfcba = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcba = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaca+mfcca;
                   m1 = mfcca-mfaca;
@@ -417,7 +418,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaca = m0;
                   m0 += c1o3 * oMdrho;
                   mfbca = m1-m0 * vvx;
-                  mfcca = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcca = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaab+mfcab;
@@ -425,21 +426,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   m0 = m2+mfbab;
                   mfaab = m0;
                   mfbab = m1-m0 * vvx;
-                  mfcab = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcab = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfabb+mfcbb;
                   m1 = mfcbb-mfabb;
                   m0 = m2+mfbbb;
                   mfabb = m0;
                   mfbbb = m1-m0 * vvx;
-                  mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcbb = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfacb+mfccb;
                   m1 = mfccb-mfacb;
                   m0 = m2+mfbcb;
                   mfacb = m0;
                   mfbcb = m1-m0 * vvx;
-                  mfccb = m2-two*	m1 * vvx+vx2 * m0;
+                  mfccb = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaac+mfcac;
@@ -448,14 +449,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaac = m0;
                   m0 += c1o3 * oMdrho;
                   mfbac = m1-m0 * vvx;
-                  mfcac = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcac = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfabc+mfcbc;
                   m1 = mfcbc-mfabc;
                   m0 = m2+mfbbc;
                   mfabc = m0;
                   mfbbc = m1-m0 * vvx;
-                  mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfacc+mfccc;
                   m1 = mfccc-mfacc;
@@ -463,7 +464,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfacc = m0;
                   m0 += c1o9 * oMdrho;
                   mfbcc = m1-m0 * vvx;
-                  mfccc = m2-two*	m1 * vvx+vx2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
 
@@ -477,7 +478,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////
                   //3.
                   //////////////////////////////
-                  LBMReal OxyyPxzz = one;//three  * (two - omega) / (three  - omega);//
+                  real OxyyPxzz = c1o1;//three  * (two - omega) / (three  - omega);//
                   //LBMReal OxyyMxzz = one;//six    * (two - omega) / (six    - omega);//
 //                  LBMReal Oxyz = one;//twelve * (two - omega) / (twelve + omega);//
                   //////////////////////////////
@@ -501,47 +502,47 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////
                   //4.
                   //////////////////////////////
-                  LBMReal O4 = one;
+                  real O4 = c1o1;
                   //////////////////////////////
-                  //LBMReal O4        = omega;//TRT
+                  //real O4        = omega;//TRT
                   ////////////////////////////////////////////////////////////
                   //5.
                   //////////////////////////////
-                  LBMReal O5 = one;
+                  real O5 = c1o1;
                   ////////////////////////////////////////////////////////////
                   //6.
                   //////////////////////////////
-                  LBMReal O6 = one;
+                  real O6 = c1o1;
                   ////////////////////////////////////////////////////////////
 
 
                   //central moments to cumulants
                   //4.
-                  LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-                  LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-                  LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+                  real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+                  real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+                  real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-                  LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-                  LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-                  LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+                  real CUMcca = mfcca-(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+                  real CUMcac = mfcac-(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+                  real CUMacc = mfacc-(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                   //5.
-                  LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-                  LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-                  LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+                  real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                   //6.
 
-                  LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+                  real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                      -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                        +two * (mfcaa * mfaca * mfaac)
-                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +c2o1 * (mfcaa * mfaca * mfaac)
+                        +c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                      -c1o3 * (mfacc+mfcac+mfcca)/rho
                      -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                     +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                         +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                      +c1o27*((drho * drho-drho)/(rho*rho)));
                   //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -551,9 +552,9 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
       //2.
       // linear combinations
-                  LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-                  LBMReal mxxMyy = mfcaa-mfaca;
-                  LBMReal mxxMzz = mfcaa-mfaac;
+                  real mxxPyyPzz = mfcaa+mfaca+mfaac;
+                  real mxxMyy = mfcaa-mfaca;
+                  real mxxMzz = mfcaa-mfaac;
 
                   //////////////////////////////////////////////////////////////////////////
          // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -593,14 +594,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                   //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
                   {
-                     LBMReal dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
-                     LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-                     LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+                     real dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
+                     real dyuy = dxux+omega * c3o2 * mxxMyy;
+                     real dzuz = dxux+omega * c3o2 * mxxMzz;
 
                      //relax
-                     mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-                     mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
-                     mxxMzz += omega * (-mxxMzz)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
+                     mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-c3o1 * (c1o1-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                     mxxMyy += omega * (-mxxMyy)- c3o1 * (c1o1+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
+                     mxxMzz += omega * (-mxxMzz)- c3o1 * (c1o1+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
 
                      //////////////////////////////////////////////////////////////////////////
                      //limiter-Scheise Teil 2
@@ -630,20 +631,20 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
                   // linear combinations back
                   mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-                  mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-                  mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+                  mfaca = c1o3 * (-c2o1*  mxxMyy+mxxMzz+mxxPyyPzz);
+                  mfaac = c1o3 * (mxxMyy-c2o1* mxxMzz+mxxPyyPzz);
 
                   //3.
                   // linear combinations
 
-                  LBMReal mxxyPyzz = mfcba+mfabc;
-                  LBMReal mxxyMyzz = mfcba-mfabc;
+                  real mxxyPyzz = mfcba+mfabc;
+                  real mxxyMyzz = mfcba-mfabc;
 
-                  LBMReal mxxzPyyz = mfcab+mfacb;
-                  LBMReal mxxzMyyz = mfcab-mfacb;
+                  real mxxzPyyz = mfcab+mfacb;
+                  real mxxzMyyz = mfcab-mfacb;
 
-                  LBMReal mxyyPxzz = mfbca+mfbac;
-                  LBMReal mxyyMxzz = mfbca-mfbac;
+                  real mxyyPxzz = mfbca+mfbac;
+                  real mxyyMxzz = mfbca-mfbac;
 
                   //relax
                   //////////////////////////////////////////////////////////////////////////
@@ -721,31 +722,31 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
                   //back cumulants to central moments
                   //4.
-                  mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-                  mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-                  mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+                  mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;
+                  mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho;
+                  mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho;
 
-                  mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-                  mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-                  mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfcca = CUMcca+(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfcac = CUMcac+(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfacc = CUMacc+(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                   //5.
-                  mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-                  mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-                  mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+                  mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                   //6.
 
-                  mfccc = CUMccc-((-four *  mfbbb * mfbbb
+                  mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                      -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                        +two * (mfcaa * mfaca * mfaac)
-                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +c2o1 * (mfcaa * mfaca * mfaac)
+                        +c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                      -c1o3 * (mfacc+mfcac+mfcca)/rho
                      -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                     +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                         +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                      +c1o27*((drho * drho-drho)/(rho*rho)));
                   ////////////////////////////////////////////////////////////////////////////////////
@@ -761,22 +762,22 @@ void CompressibleCumulantLBMKernel::calculate(int step)
             //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
             ////////////////////////////////////////////////////////////////////////////////////
             // Z - Dir
-                  m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-                  m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+                  m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+c1o1* oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfaac-c2o1* mfaab *  vvz+mfaaa                * (c1o1-vz2)-c1o1* oMdrho * vz2;
+                  m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+c1o1* oMdrho) * (vz2+vvz) * c1o2;
                   mfaaa = m0;
                   mfaab = m1;
                   mfaac = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-                  m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+                  m1 = -mfabc-c2o1* mfabb *  vvz+mfaba * (c1o1-vz2);
                   m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                   mfaba = m0;
                   mfabb = m1;
                   mfabc = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m1 = -mfacc-c2o1* mfacb *  vvz+mfaca                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                   m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                   mfaca = m0;
                   mfacb = m1;
@@ -784,21 +785,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-                  m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+                  m1 = -mfbac-c2o1* mfbab *  vvz+mfbaa * (c1o1-vz2);
                   m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                   mfbaa = m0;
                   mfbab = m1;
                   mfbac = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-                  m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+                  m1 = -mfbbc-c2o1* mfbbb *  vvz+mfbba * (c1o1-vz2);
                   m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                   mfbba = m0;
                   mfbbb = m1;
                   mfbbc = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-                  m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+                  m1 = -mfbcc-c2o1* mfbcb *  vvz+mfbca * (c1o1-vz2);
                   m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                   mfbca = m0;
                   mfbcb = m1;
@@ -806,21 +807,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m1 = -mfcac-c2o1* mfcab *  vvz+mfcaa                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                   m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                   mfcaa = m0;
                   mfcab = m1;
                   mfcac = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-                  m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+                  m1 = -mfcbc-c2o1* mfcbb *  vvz+mfcba * (c1o1-vz2);
                   m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                   mfcba = m0;
                   mfcbb = m1;
                   mfcbc = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+                  m1 = -mfccc-c2o1* mfccb *  vvz+mfcca                  * (c1o1-vz2)-c1o9 * oMdrho * vz2;
                   m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                   mfcca = m0;
                   mfccb = m1;
@@ -831,21 +832,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   // Y - Dir
                   m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m1 = -mfaca-c2o1* mfaba *  vvy+mfaaa                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                   m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaaa = m0;
                   mfaba = m1;
                   mfaca = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+                  m1 = -mfacb-c2o1* mfabb *  vvy+mfaab                  * (c1o1-vy2)-c2o3 * oMdrho * vy2;
                   m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaab = m0;
                   mfabb = m1;
                   mfacb = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m1 = -mfacc-c2o1* mfabc *  vvy+mfaac                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                   m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaac = m0;
                   mfabc = m1;
@@ -853,21 +854,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-                  m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+                  m1 = -mfbca-c2o1* mfbba *  vvy+mfbaa * ( c1o1-vy2);
                   m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                   mfbaa = m0;
                   mfbba = m1;
                   mfbca = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-                  m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+                  m1 = -mfbcb-c2o1* mfbbb *  vvy+mfbab * (c1o1-vy2);
                   m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                   mfbab = m0;
                   mfbbb = m1;
                   mfbcb = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-                  m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+                  m1 = -mfbcc-c2o1* mfbbc *  vvy+mfbac * (c1o1-vy2);
                   m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                   mfbac = m0;
                   mfbbc = m1;
@@ -875,21 +876,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m1 = -mfcca-c2o1* mfcba *  vvy+mfcaa                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                   m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcaa = m0;
                   mfcba = m1;
                   mfcca = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+                  m1 = -mfccb-c2o1* mfcbb *  vvy+mfcab                  * (c1o1-vy2)-c2o9 * oMdrho * vy2;
                   m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcab = m0;
                   mfcbb = m1;
                   mfccb = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m1 = -mfccc-c2o1* mfcbc *  vvy+mfcac                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                   m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcac = m0;
                   mfcbc = m1;
@@ -900,21 +901,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   // X - Dir
                   m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcaa-c2o1* mfbaa *  vvx+mfaaa                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaaa = m0;
                   mfbaa = m1;
                   mfcaa = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcba-c2o1* mfbba *  vvx+mfaba                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaba = m0;
                   mfbba = m1;
                   mfcba = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcca-c2o1* mfbca *  vvx+mfaca                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaca = m0;
                   mfbca = m1;
@@ -922,21 +923,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcab-c2o1* mfbab *  vvx+mfaab                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaab = m0;
                   mfbab = m1;
                   mfcab = m2;
                   ///////////b////////////////////////////////////////////////////////////////////////
                   m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+                  m1 = -mfcbb-c2o1* mfbbb *  vvx+mfabb                  * (c1o1-vx2)-c4o9 * oMdrho * vx2;
                   m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfabb = m0;
                   mfbbb = m1;
                   mfcbb = m2;
                   ///////////b////////////////////////////////////////////////////////////////////////
                   m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfccb-c2o1* mfbcb *  vvx+mfacb                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfacb = m0;
                   mfbcb = m1;
@@ -944,21 +945,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcac-c2o1* mfbac *  vvx+mfaac                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaac = m0;
                   mfbac = m1;
                   mfcac = m2;
                   ///////////c////////////////////////////////////////////////////////////////////////
                   m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcbc-c2o1* mfbbc *  vvx+mfabc                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfabc = m0;
                   mfbbc = m1;
                   mfcbc = m2;
                   ///////////c////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfccc-c2o1* mfbcc *  vvx+mfacc                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfacc = m0;
                   mfbcc = m1;
@@ -969,11 +970,11 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   //proof correctness
                   //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                  LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+                  real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                      +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                      +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                   //LBMReal dif = fabs(rho - rho_post);
-                  LBMReal dif = drho - drho_post;
+                  real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                   if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1030,7 +1031,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double CompressibleCumulantLBMKernel::getCalculationTime()
+real CompressibleCumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
index 802a707708583b9f76700f5f12038f70b29db7b4..7af69201cde7bd8812173f1b6bd9d397e731686b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
@@ -21,32 +21,32 @@ public:
    ~CompressibleCumulantLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setBulkOmegaToOmega(bool value);
    void setRelaxationParameter(Parameter p);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzz; 
+   real OxxPyyPzz; 
 };
 #endif // CompressibleCumulantLBMKernel_h__
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
index a359db9cd61ef3042130f5148abdc4cf1488617a..358f8269a65b247efb7abd9c6a2a840c6e122e08 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
@@ -1,10 +1,11 @@
 #include "CompressibleOffsetInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetInterpolationProcessor::CompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetInterpolationProcessor::CompressibleOffsetInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
 
@@ -19,13 +20,13 @@ InterpolationProcessorPtr CompressibleOffsetInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -35,7 +36,7 @@ void CompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -49,49 +50,51 @@ void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICel
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
+   using namespace vf::lbm::constant;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -346,7 +349,7 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -454,21 +457,22 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal  /*omega*/, LBMReal  /*x*/, LBMReal  /*y*/, LBMReal  /*z*/, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(real* f, real  /*omega*/, real  /*x*/, real  /*y*/, real  /*z*/, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingF*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcCompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -501,7 +505,7 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -514,7 +518,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -527,7 +531,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -540,7 +544,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -553,7 +557,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -566,7 +570,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -579,7 +583,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -592,7 +596,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -605,11 +609,12 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -617,22 +622,22 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingC*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcCompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
 //   LBMReal op = 1.;
 
    //f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
@@ -694,14 +699,14 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
index b81277683d0feaf97ed2f9c45cc108a99b9d8a3c..fefd6ed1566fe0a4c3c414748a522edbfede48a1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
@@ -15,52 +15,52 @@ class CompressibleOffsetInterpolationProcessor : public InterpolationProcessor
 {
 public:
    CompressibleOffsetInterpolationProcessor() = default;
-   CompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetInterpolationProcessor() override = default;
 
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
    //LBMReal forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
 //   LBMReal a,b,c;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -69,7 +69,7 @@ inline void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
index e6883626c94e8de2b01f0c331f7580a7a7b9b9d2..7321e7f7bfca2080b9c52f89c5c9354b219c1a94 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
@@ -1,24 +1,25 @@
 #include "CompressibleOffsetMomentsInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor()
     
 {
    this->bulkViscosity = 0.0;
    this->shearViscosity = 0.0;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
    this->bulkViscosity = 0.0;
    this->shearViscosity = 0.0;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
 CompressibleOffsetMomentsInterpolationProcessor::~CompressibleOffsetMomentsInterpolationProcessor()
@@ -34,13 +35,13 @@ InterpolationProcessorPtr CompressibleOffsetMomentsInterpolationProcessor::clone
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 
-   LBMReal dtC = (3.0*shearViscosity)/((1/omegaC)-0.5);
-   LBMReal dtF = (3.0*shearViscosity)/((1/omegaF)-0.5);
+   real dtC = (3.0*shearViscosity)/((1/omegaC)-0.5);
+   real dtF = (3.0*shearViscosity)/((1/omegaF)-0.5);
 
    if (bulkViscosity != 0)
    {
@@ -49,12 +50,12 @@ void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( LBMReal omegaC,
    }
    else
    {
-      this->OxxPyyPzzC = one;
-      this->OxxPyyPzzF = one;
+      this->OxxPyyPzzC = c1o1;
+      this->OxxPyyPzzF = c1o1;
    }
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -64,7 +65,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(LBMReal xoff, L
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -78,49 +79,50 @@ void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetMomentsInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -375,7 +377,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiet
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -483,79 +485,81 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiet
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
+   using namespace vf::lbm::constant;
 
-   LBMReal eps_new = 0.5;
-   LBMReal o = omega;
+   real eps_new = 0.5;
+   real o = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzF;
+   real oP = OxxPyyPzzF;
 
 //   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = press; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
 
    //2.f
 
    // linear combinations
-   LBMReal mxxPyyPzz = mfaaa - c2o3*(ax + by + two*axx*x + bxy*x + axy*y + two*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + two*czz*z)*eps_new / oP* (one + press);
-   LBMReal mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + two*axx*x - bxy*x + axy*y - two*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + two*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - two*czz*z + axyz*y*z)*eps_new/o * (one + press);
+   real mxxPyyPzz = mfaaa - c2o3*(ax + by + c2o1 *axx*x + bxy*x + axy*y + c2o1 *byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + c2o1 *czz*z)*eps_new / oP* (c1o1 + press);
+   real mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + c2o1 *axx*x - bxy*x + axy*y - c2o1 *byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + c2o1 *axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - c2o1 *czz*z + axyz*y*z)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + two*cyy*y + bxyz*x*y + two*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + two*cxx*x + ayz*y + cxy*y + axyz*x*y + two*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + two*bxx*x + two*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + c2o1 *cyy*y + bxyz*x*y + c2o1 *bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + c2o1 *cxx*x + ayz*y + cxy*y + axyz*x*y + c2o1 *azz*z + cxz*z + cxyz*y*z)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + c2o1 *bxx*x + c2o1 *ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (c1o1 + press);
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz) ;
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz) ;
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) ;
 
    //three
-   mfbbb = zeroReal;
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   mfbbb = c0o1;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz = c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -581,22 +585,22 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -604,21 +608,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -626,21 +630,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -651,21 +655,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -673,21 +677,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -695,21 +699,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -720,21 +724,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -742,21 +746,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -764,21 +768,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -815,7 +819,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -828,7 +832,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -841,7 +845,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -854,7 +858,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -867,7 +871,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -880,7 +884,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -893,7 +897,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -906,7 +910,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -919,11 +923,12 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -931,81 +936,81 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
 //   LBMReal rho = press ;//+ (ax+by+cz)/3.;
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzC;
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real oP = OxxPyyPzzC;
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = press; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
    //oMdrho = one - mfaaa;
 
    //2.f
    // linear combinations
 
 /////////////////////////
-   LBMReal mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(one+press);
+   real mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(c1o1 +press);
 
-   LBMReal mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (one + press);
+   real mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (c1o1 + press);
 
    ////////////////////////
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //three
-   mfbbb = zeroReal;
+   mfbbb = c0o1;
 
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz = zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz = c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -1029,22 +1034,22 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1052,21 +1057,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1074,21 +1079,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1099,21 +1104,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1121,21 +1126,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1143,21 +1148,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1168,21 +1173,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1190,21 +1195,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1212,21 +1217,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1262,14 +1267,14 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    f[DIR_MMM] = mfaaa;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
@@ -1279,7 +1284,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStres
 	tauyz=0.5*((bz+2.0*bzz*z+bxz*x+byz*y+bxyz*x*y)+(cy+2.0*cyy*y+cxy*x+cyz*z+cxyz*x*z));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setBulkViscosity(LBMReal shearViscosity, LBMReal bulkViscosity)
+void CompressibleOffsetMomentsInterpolationProcessor::setBulkViscosity(real shearViscosity, real bulkViscosity)
 {
    this->shearViscosity = shearViscosity;
    this->bulkViscosity  = bulkViscosity;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
index bee108e64a9294a3286e3b79519d496bd5ac91cf..32ab8cedf89e2e644f2f939f49ed4b0101eb0e32 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
@@ -15,57 +15,57 @@ class CompressibleOffsetMomentsInterpolationProcessor : public InterpolationProc
 {
 public:
    CompressibleOffsetMomentsInterpolationProcessor();
-   CompressibleOffsetMomentsInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetMomentsInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetMomentsInterpolationProcessor() override;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
-   void setBulkViscosity(LBMReal shearViscosity, LBMReal bulkViscosity);
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
+   void setBulkViscosity(real shearViscosity, real bulkViscosity);
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-//   LBMReal a,b,c;
+//   real a,b,c;
 
    // bulk viscosity
-   LBMReal shearViscosity;
-   LBMReal bulkViscosity;
-   LBMReal OxxPyyPzzC;
-   LBMReal OxxPyyPzzF;
+   real shearViscosity;
+   real bulkViscosity;
+   real OxxPyyPzzC;
+   real OxxPyyPzzF;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -74,7 +74,7 @@ inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseTo
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
index c9cc8138dcf1d4ce11ee4e2aa7b733f2174f367d..f924d4b9150eabc78b9aa8b5e72eb3dd5df3cf45 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
@@ -1,22 +1,23 @@
 #include "CompressibleOffsetSquarePressureInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor()
     
 {
    this->bulkOmegaToOmega = false;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
    this->bulkOmegaToOmega = false;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
 CompressibleOffsetSquarePressureInterpolationProcessor::~CompressibleOffsetSquarePressureInterpolationProcessor()
@@ -32,19 +33,19 @@ InterpolationProcessorPtr CompressibleOffsetSquarePressureInterpolationProcessor
    }
    else
    {
-      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzC = one;
-      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzF = one;
+      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzC = c1o1;
+      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzF = c1o1;
    }
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetSquarePressureInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -54,7 +55,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(LBMReal
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -68,49 +69,50 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseTo
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -365,7 +367,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoe
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -473,84 +475,85 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoe
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal eps_new = 0.5;
-   LBMReal o = omega;
+   real eps_new = 0.5;
+   real o = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzF;
+   real oP = OxxPyyPzzF;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
 
-   LBMReal laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
+   real laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
 
    rho=rho+laplaceRho*(3.0/16.0);
 
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = rho; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
 
    //2.f
 
    // linear combinations
-   LBMReal mxxPyyPzz = mfaaa - c2o3*(ax + by + two*axx*x + bxy*x + axy*y + two*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + two*czz*z)*eps_new / oP* (one + press);
-   LBMReal mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + two*axx*x - bxy*x + axy*y - two*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + two*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - two*czz*z + axyz*y*z)*eps_new/o * (one + press);
+   real mxxPyyPzz = mfaaa - c2o3*(ax + by + c2o1*axx*x + bxy*x + axy*y + c2o1*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + c2o1*czz*z)*eps_new / oP* (c1o1 + press);
+   real mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + c2o1*axx*x - bxy*x + axy*y - c2o1*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + c2o1*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - c2o1*czz*z + axyz*y*z)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + two*cyy*y + bxyz*x*y + two*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + two*cxx*x + ayz*y + cxy*y + axyz*x*y + two*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + two*bxx*x + two*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + c2o1*cyy*y + bxyz*x*y + c2o1*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + c2o1*cxx*x + ayz*y + cxy*y + axyz*x*y + c2o1*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + c2o1*bxx*x + c2o1*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (c1o1 + press);
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz) ;
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz) ;
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) ;
 
    //three
-   mfbbb = zeroReal;
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   mfbbb = c0o1;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz =  c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -576,22 +579,22 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -599,21 +602,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -621,21 +624,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -646,21 +649,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -668,21 +671,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -690,21 +693,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -715,21 +718,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -737,21 +740,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -759,21 +762,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -810,7 +813,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -823,7 +826,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -836,7 +839,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -849,7 +852,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -862,7 +865,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -875,7 +878,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -888,7 +891,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -901,7 +904,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -914,11 +917,12 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -926,86 +930,86 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
   
    
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
-   LBMReal laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
+   real laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
 
    rho=rho-laplaceRho*0.25;
 
-   LBMReal eps_new = 2.0;
-   LBMReal o  = omega;
+   real eps_new = 2.0;
+   real o  = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzC;
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real oP = OxxPyyPzzC;
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = rho; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
-   //oMdrho = one - mfaaa;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
+   //oMdrho = c1o1 - mfaaa;
 
    //2.f
    // linear combinations
 
 /////////////////////////
-   LBMReal mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(one+press);
+   real mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(c1o1+press);
 
-   LBMReal mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (one + press);
+   real mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (c1o1 + press);
 
    ////////////////////////
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //three
-   mfbbb = zeroReal;
+   mfbbb = c0o1;
 
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz =  c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -1029,22 +1033,22 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1052,21 +1056,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1074,21 +1078,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1099,21 +1103,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1121,21 +1125,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1143,21 +1147,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1168,21 +1172,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1190,21 +1194,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1212,21 +1216,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1262,14 +1266,14 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    f[DIR_MMM]  = mfaaa;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
index e456668afc3d8b3a5c993774d60df5c9edff28a7..d9285289f63e7dfca694342df092b6e5f4ecb4d5 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
@@ -15,56 +15,56 @@ class CompressibleOffsetSquarePressureInterpolationProcessor : public Interpolat
 {
 public:
    CompressibleOffsetSquarePressureInterpolationProcessor();
-   CompressibleOffsetSquarePressureInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetSquarePressureInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetSquarePressureInterpolationProcessor() override;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
    void setBulkOmegaToOmega(bool value);
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-//   LBMReal a,b,c;
+//   real a,b,c;
 
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzzC;
-   LBMReal OxxPyyPzzF;
+   real OxxPyyPzzC;
+   real OxxPyyPzzF;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -73,7 +73,7 @@ inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateC
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
index 2a895950a79cf011c25b0d352689216f53e96d41..e9c7007a90063c0f65a6c69053441ae8581866a6 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
@@ -41,7 +41,9 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant
+;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantK17LBMKernel::CumulantK17LBMKernel()
@@ -127,7 +129,7 @@ void CumulantK17LBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-    LBMReal omega = collFactor;
+    real omega = collFactor;
 
     for (int x3 = minX3; x3 < maxX3; x3++)
     {
@@ -164,54 +166,54 @@ void CumulantK17LBMKernel::calculate(int step)
                     // a b c
                     //-1 0 1
 
-                    LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                    LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                    LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                    LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                    LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                    LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                    LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                    LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                    LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                    LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                    LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                    LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                    LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                    LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                    LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                    LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                    LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                    LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                    LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                    LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                    LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                    LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                    LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                    LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                    LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                    LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                    LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3);
+                    real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                    real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                    real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                    real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                    real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                    real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                    real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                    real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                    real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                    real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                    real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                    real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                    real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                    real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                    real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                    real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                    real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                    real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                    real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                    real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                    real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                    real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                    real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                    real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                    real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                    real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                    real mfbbb = (*this->restDistributions)(x1, x2, x3);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3)
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
                     //!
-                    LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                    real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                                     (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
                                     ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 
-                    LBMReal rho = c1 + drho;
-                    LBMReal OOrho = c1 / rho;
+                    real rho = c1o1 + drho;
+                    real OOrho = c1o1 / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
-                    LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
                                    (mfcbb - mfabb)) / rho;
-                    LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
                                    (mfbcb - mfbab)) / rho;
-                    LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
                                    (mfbbc - mfbba)) / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
@@ -219,9 +221,9 @@ void CumulantK17LBMKernel::calculate(int step)
                     ///////////////////////////////////////////////////////////////////////////////////////////
                     if (withForcing)
                     {
-                        muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-                        muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-                        muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+                        muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+                        muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+                        muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
                         forcingX1 = muForcingX1.Eval();
                         forcingX2 = muForcingX2.Eval();
@@ -237,17 +239,17 @@ void CumulantK17LBMKernel::calculate(int step)
                     }
                     ////////////////////////////////////////////////////////////////////////////////////
                     // calculate the square of velocities for this lattice node
-                    LBMReal vx2 = vvx * vvx;
-                    LBMReal vy2 = vvy * vvy;
-                    LBMReal vz2 = vvz * vvz;
+                    real vx2 = vvx * vvx;
+                    real vy2 = vvy * vvy;
+                    real vz2 = vvz * vvz;
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    LBMReal wadjust;
-                    LBMReal qudricLimitP = c1o100;
-                    LBMReal qudricLimitM = c1o100;
-                    LBMReal qudricLimitD = c1o100;
+                    real wadjust;
+                    real qudricLimitP = c1o100;
+                    real qudricLimitM = c1o100;
+                    real qudricLimitD = c1o100;
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -256,39 +258,39 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to
@@ -304,29 +306,29 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //2.
-                    LBMReal OxxPyyPzz = c1;
+                    real OxxPyyPzz = c1o1;
                     ////////////////////////////////////////////////////////////
                     //3.
-                    LBMReal OxyyPxzz = c8  * (-c2 + omega) * ( c1 + c2*omega) / (-c8 - c14*omega + c7*omega*omega);
-                    LBMReal OxyyMxzz = c8  * (-c2 + omega) * (-c7 + c4*omega) / (c56 - c50*omega + c9*omega*omega);
-                    LBMReal Oxyz     = c24 * (-c2 + omega) * (-c2 - c7*omega + c3*omega*omega) / (c48 + c152*omega - c130*omega*omega + c29*omega*omega*omega);
+                    real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1 *omega) / (-c8o1 - c14o1 *omega + c7o1 *omega*omega);
+                    real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 *omega) / (c56o1 - c50o1 *omega + c9o1 *omega*omega);
+                    real Oxyz     = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 *omega + c3o1 *omega*omega) / (c48o1 + c152o1 *omega - c130o1 *omega*omega + c29o1 *omega*omega*omega);
                     ////////////////////////////////////////////////////////////
                     //4.
-                    LBMReal O4 = c1;
+                    real O4 = c1o1;
                     ////////////////////////////////////////////////////////////
                     //5.
-                    LBMReal O5 = c1;
+                    real O5 = c1o1;
                     ////////////////////////////////////////////////////////////
                     //6.
-                    LBMReal O6 = c1;
+                    real O6 = c1o1;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //! with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity).
                     //!
-                    LBMReal A = (c4 + c2*omega - c3*omega*omega) / (c2 - c7*omega + c5*omega*omega);
-                    LBMReal B = (c4 + c28*omega - c14*omega*omega) / (c6 - c21*omega + c15*omega*omega);
+                    real A = (c4o1 + c2o1 *omega - c3o1 *omega*omega) / (c2o1 - c7o1 *omega + c5o1 *omega*omega);
+                    real B = (c4o1 + c28o1 *omega - c14o1 *omega*omega) / (c6o1 - c21o1 *omega + c15o1 *omega*omega);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Compute cumulants from central moments according to Eq. (20)-(23) in
@@ -334,30 +336,30 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //4.
-                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho;
-                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho;
-                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho;
+                    real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+                    real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+                    real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
 
-                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
-                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
                     ////////////////////////////////////////////////////////////
                     //5.
-                    LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
-                    LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
-                    LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
+                    real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
+                    real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
+                    real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
                     ////////////////////////////////////////////////////////////
                     //6.
-                    LBMReal CUMccc = mfccc + ((-c4 * mfbbb * mfbbb
+                    real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb
                                                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-                                               - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-                                               - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-                                              + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-                                                 + c2 * (mfcaa * mfaca * mfaac)
-                                                 + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho
+                                               - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                                               - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                                              + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                                                 + c2o1 * (mfcaa * mfaca * mfaac)
+                                                 + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho
                                               - c1o3 * (mfacc + mfcac + mfcca) * OOrho
                                               - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-                                              + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                                              + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
                                                  + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
                                               + c1o27 * ((drho * drho - drho) * OOrho * OOrho));
 
@@ -366,19 +368,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //2.
-                    LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-                    LBMReal mxxMyy = mfcaa - mfaca;
-                    LBMReal mxxMzz = mfcaa - mfaac;
+                    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+                    real mxxMyy = mfcaa - mfaca;
+                    real mxxMzz = mfcaa - mfaac;
                     ////////////////////////////////////////////////////////////
                     //3.
-                    LBMReal mxxyPyzz = mfcba + mfabc;
-                    LBMReal mxxyMyzz = mfcba - mfabc;
+                    real mxxyPyzz = mfcba + mfabc;
+                    real mxxyMyzz = mfcba - mfabc;
 
-                    LBMReal mxxzPyyz = mfcab + mfacb;
-                    LBMReal mxxzMyyz = mfcab - mfacb;
+                    real mxxzPyyz = mfcab + mfacb;
+                    real mxxzMyyz = mfcab - mfacb;
 
-                    LBMReal mxyyPxzz = mfbca + mfbac;
-                    LBMReal mxyyMxzz = mfbca - mfbac;
+                    real mxyyPxzz = mfbca + mfbac;
+                    real mxyyMxzz = mfbca - mfbac;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //incl. correction
@@ -389,19 +391,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
                     //! Note that the division by rho is omitted here as we need rho times the gradients later.
                     //!
-                    LBMReal Dxy = -c3 * omega * mfbba;
-                    LBMReal Dxz = -c3 * omega * mfbab;
-                    LBMReal Dyz = -c3 * omega * mfabb;
-                    LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-                    LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
-                    LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
+                    real Dxy = -c3o1 * omega * mfbba;
+                    real Dxz = -c3o1 * omega * mfbab;
+                    real Dyz = -c3o1 * omega * mfabb;
+                    real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                    real dyuy = dxux + omega * c3o2 * mxxMyy;
+                    real dzuz = dxux + omega * c3o2 * mxxMzz;
                     ////////////////////////////////////////////////////////////
                     //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-                    mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-                    mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+                    mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+                    mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                    mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                     ////no correction
@@ -420,19 +422,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
+                    wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
                     mfbbb += wadjust * (-mfbbb);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
                     mxxyPyzz += wadjust * (-mxxyPyzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
                     mxxyMyzz += wadjust * (-mxxyMyzz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
                     mxxzPyyz += wadjust * (-mxxzPyyz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
                     mxxzMyyz += wadjust * (-mxxzMyyz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
                     mxyyPxzz += wadjust * (-mxyyPxzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
                     mxyyMxzz += wadjust * (-mxyyMxzz);
                     //////////////////////////////////////////////////////////////////////////
                     // no limiter
@@ -448,8 +450,8 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Compute inverse linear combinations of second and third order cumulants
                     //!
                     mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz);
+                    mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+                    mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
                     mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
                     mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
@@ -465,12 +467,12 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc);
-                    CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac);
-                    CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca);
-                    CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc);
-                    CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb);
-                    CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb);
+                    CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+                    CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+                    CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+                    CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+                    CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+                    CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb);
 
                     //////////////////////////////////////////////////////////////////////////
                     //5.
@@ -489,32 +491,32 @@ void CumulantK17LBMKernel::calculate(int step)
 
                     //////////////////////////////////////////////////////////////////////////
                     //4.
-                    mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho;
-                    mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho;
-                    mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho;
+                    mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
+                    mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+                    mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
 
-                    mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-                    mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
-                    mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
 
                     //////////////////////////////////////////////////////////////////////////
                     //5.
-                    mfbcc = CUMbcc + c1o3 * (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
-                    mfcbc = CUMcbc + c1o3 * (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
-                    mfccb = CUMccb + c1o3 * (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho;
+                    mfbcc = CUMbcc + c1o3 * (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
+                    mfcbc = CUMcbc + c1o3 * (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
+                    mfccb = CUMccb + c1o3 * (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho;
 
                     //////////////////////////////////////////////////////////////////////////
                     //6.
-                    mfccc = CUMccc - ((-c4 * mfbbb * mfbbb
+                    mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb
                                        - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-                                       - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-                                       - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-                                      + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-                                         + c2 * (mfcaa * mfaca * mfaac)
-                                         + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho
+                                       - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                                       - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                                      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                                         + c2o1 * (mfcaa * mfaca * mfaac)
+                                         + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho
                                       - c1o3 * (mfacc + mfcac + mfcca) * OOrho
                                       - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-                                      + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                                      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
                                          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
                                       + c1o27 * ((drho * drho - drho) * OOrho * OOrho));
 
@@ -536,49 +538,49 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
                     ////////////////////////////////////////////////////////////////////////////////////
 
                     //////////////////////////////////////////////////////////////////////////
                     //proof correctness
                     //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                    LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+                    real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
                                         + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
                                         + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
-                    LBMReal dif = drho - drho_post;
+                    real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
index aab4d669655efe5bd489feb3829da28e67aa9ecb..345f12fd81f708f13371bffc77a4690dae37d085 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
@@ -55,27 +55,27 @@ public:
     ~CumulantK17LBMKernel() = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
 protected:
-    inline void forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K);
-    inline void backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K);
-    inline void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    inline void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    inline void forwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K);
+    inline void backwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K);
+    inline void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    inline void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
     virtual void initDataSet();
-    LBMReal f[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr restDistributions;
 
     mu::value_type muX1, muX2, muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -84,17 +84,18 @@ protected:
 //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
+inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K)
 {
-    using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-    LBMReal m1 = mfc - mfa;
-    LBMReal m0 = m2 + mfb;
+//    using namespace UbMath;
+    using namespace vf::lbm::constant;
+    real m2 = mfa + mfc;
+    real m1 = mfc - mfa;
+    real m0 = m2 + mfb;
     mfa = m0;
     m0 *= Kinverse;
-    m0 += c1;
+    m0 += c1o1;
     mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 ////////////////////////////////////////////////////////////////////////////////
 //! \brief backward chimera transformation \ref backwardInverseChimeraWithK
@@ -102,12 +103,14 @@ inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMRe
 //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
 //! ] Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
+inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K)
 {
-    using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 - vv) * c1o2) * K;
-    LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (-v2)) * K;
-    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K;
+//    using namespace UbMath;
+    using namespace vf::lbm::constant;
+ 
+    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
     mfa = m0;
     mfb = m1;
 }
@@ -118,12 +121,14 @@ inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMR
 //! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
+inline void CumulantK17LBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2)
 {
-    using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-    LBMReal m2 = mfc - mfa;
-    mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+//    using namespace UbMath;
+    using namespace vf::lbm::constant;
+    
+    real m1 = (mfa + mfc) + mfb;
+    real m2 = mfc - mfa;
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
     mfb = m2 - vv * m1;
     mfa = m1;
 }
@@ -134,11 +139,13 @@ inline void CumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBM
 //! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
+inline void CumulantK17LBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2)
 {
-    using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+//    using namespace UbMath;
+    using namespace vf::lbm::constant;
+
+    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
     mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
     mfb = mb;
     mfa = ma;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
index e001cbf970d59f6f847589810eeaacb86a3e22a2..2d39205c544e430aac8c0a4181d2c8e3b4791e7a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
@@ -46,7 +46,8 @@
 
 //#define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantK17LBMKernelUnified::CumulantK17LBMKernelUnified()
@@ -132,7 +133,7 @@ void CumulantK17LBMKernelUnified::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-    LBMReal omega = collFactor;
+    real omega = collFactor;
 
     for (int x3 = minX3; x3 < maxX3; x3++)
     {
@@ -169,43 +170,43 @@ void CumulantK17LBMKernelUnified::calculate(int step)
                     // a b c
                     //-1 0 1
 
-                    LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                    LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                    LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                    LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                    LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                    LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                    LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                    LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                    LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                    LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                    LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                    LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                    LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                    LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                    LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                    LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                    LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                    LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                    LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                    LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                    LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                    LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                    LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                    LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                    LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                    LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                    LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3);
+                    real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                    real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                    real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                    real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                    real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                    real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                    real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                    real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                    real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                    real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                    real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                    real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                    real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                    real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                    real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                    real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                    real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                    real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                    real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                    real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                    real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                    real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                    real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                    real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                    real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                    real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                    real mfbbb = (*this->restDistributions)(x1, x2, x3);
 
                     
-                    LBMReal forces[3] = {0., 0., 0.};
+                    real forces[3] = {0., 0., 0.};
                     if (withForcing)
                     {
-                        muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-                        muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-                        muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+                        muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+                        muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+                        muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
                         forcingX1 = muForcingX1.Eval();
                         forcingX2 = muForcingX2.Eval();
@@ -281,10 +282,10 @@ void CumulantK17LBMKernelUnified::calculate(int step)
                     //proof correctness
                     //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                    LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+                    real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
                                         + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
                                         + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
-                    LBMReal dif = distribution.getDensity_() - drho_post;
+                    real dif = distribution.getDensity_() - drho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
index 175fdd4cba2a0c8ce47248f5de6672f34dda0cc3..d13e82efce5f5bc9211476ea86d989fe8663ab8c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
@@ -55,22 +55,22 @@ public:
     ~CumulantK17LBMKernelUnified() = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
 protected:
     virtual void initDataSet();
-    LBMReal f[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr restDistributions;
 
     mu::value_type muX1, muX2, muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
index 1ecfc5a4ce6e4106750fad71b9d63ac7e5dd0fc9..375f5c406c179e1e17316e7ff7faa896a5f06b2d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantLBMKernel::CumulantLBMKernel()
@@ -57,7 +58,7 @@ SPtr<LBMKernel> CumulantLBMKernel::clone()
    }
    else
    {
-      dynamicPointerCast<CumulantLBMKernel>(kernel)->OxxPyyPzz = UbMath::one;
+      dynamicPointerCast<CumulantLBMKernel>(kernel)->OxxPyyPzz = c1o1;
    }
    return kernel;
 }
@@ -1028,7 +1029,7 @@ SPtr<LBMKernel> CumulantLBMKernel::clone()
 //   //timer.stop();
 //}
 //////////////////////////////////////////////////////////////////////////
-double CumulantLBMKernel::getCalculationTime()
+real CumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
@@ -1097,50 +1098,50 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    // a b c
    //-1 0 1
 
-   LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-   LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-   LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-   LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-   LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-   LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-   LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-   LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-   LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-   LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-   LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-   LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-   LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-   LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-   LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-   LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-   LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-   LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-   LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-   LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-   LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-   LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-   LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-   LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-   LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-   LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-   LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-   ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+   real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+   real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+   real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+   real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+   real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+   real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+   real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+   real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+   real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+   real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+   real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+   real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+   real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+   real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+   real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+   real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+   real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+   real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+   real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+   real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+   real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+   real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+   real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+   real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+   real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+   real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+   real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+   ////////////////////////////////////////////////////////////////////////////////////
+   real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
       (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
       ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 
-   LBMReal rho = UbMath::one + drho;
+   real rho = c1o1 + drho;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
       (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
       (mfcbb - mfabb)) / rho;
-   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
       (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
       (mfbcb - mfbab)) / rho;
-   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
       (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
       (mfbbc - mfbba)) / rho;
    ////////////////////////////////////////////////////////////////////////////////////
@@ -1149,9 +1150,9 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ///////////////////////////////////////////////////////////////////////////////////////////
    if (withForcing)
    {
-      muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-      muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-      muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+      muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+      muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+      muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
       forcingX1 = muForcingX1.Eval();
       forcingX2 = muForcingX2.Eval();
@@ -1163,12 +1164,12 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    }
    ///////////////////////////////////////////////////////////////////////////////////////////               
 ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oMdrho = one; // comp special
+   real oMdrho = c1o1; // comp special
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal m0, m1, m2;
-   LBMReal vx2;
-   LBMReal vy2;
-   LBMReal vz2;
+   real m0, m1, m2;
+   real vx2;
+   real vy2;
+   real vz2;
    vx2 = vvx * vvx;
    vy2 = vvy * vvy;
    vz2 = vvz * vvz;
@@ -1192,7 +1193,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaaa = m0;
    m0 += c1o36 * oMdrho;
    mfaab = m1 - m0 * vvz;
-   mfaac = m2 - two * m1 * vvz + vz2 * m0;
+   mfaac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaba + mfabc;
    m1 = mfabc - mfaba;
@@ -1200,7 +1201,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaba = m0;
    m0 += c1o9 * oMdrho;
    mfabb = m1 - m0 * vvz;
-   mfabc = m2 - two * m1 * vvz + vz2 * m0;
+   mfabc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaca + mfacc;
    m1 = mfacc - mfaca;
@@ -1208,7 +1209,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaca = m0;
    m0 += c1o36 * oMdrho;
    mfacb = m1 - m0 * vvz;
-   mfacc = m2 - two * m1 * vvz + vz2 * m0;
+   mfacc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbaa + mfbac;
@@ -1217,7 +1218,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbaa = m0;
    m0 += c1o9 * oMdrho;
    mfbab = m1 - m0 * vvz;
-   mfbac = m2 - two * m1 * vvz + vz2 * m0;
+   mfbac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbba + mfbbc;
    m1 = mfbbc - mfbba;
@@ -1225,7 +1226,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbba = m0;
    m0 += c4o9 * oMdrho;
    mfbbb = m1 - m0 * vvz;
-   mfbbc = m2 - two * m1 * vvz + vz2 * m0;
+   mfbbc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbca + mfbcc;
    m1 = mfbcc - mfbca;
@@ -1233,7 +1234,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbca = m0;
    m0 += c1o9 * oMdrho;
    mfbcb = m1 - m0 * vvz;
-   mfbcc = m2 - two * m1 * vvz + vz2 * m0;
+   mfbcc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcaa + mfcac;
@@ -1242,7 +1243,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcaa = m0;
    m0 += c1o36 * oMdrho;
    mfcab = m1 - m0 * vvz;
-   mfcac = m2 - two * m1 * vvz + vz2 * m0;
+   mfcac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcba + mfcbc;
    m1 = mfcbc - mfcba;
@@ -1250,7 +1251,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcba = m0;
    m0 += c1o9 * oMdrho;
    mfcbb = m1 - m0 * vvz;
-   mfcbc = m2 - two * m1 * vvz + vz2 * m0;
+   mfcbc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcca + mfccc;
    m1 = mfccc - mfcca;
@@ -1258,7 +1259,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcca = m0;
    m0 += c1o36 * oMdrho;
    mfccb = m1 - m0 * vvz;
-   mfccc = m2 - two * m1 * vvz + vz2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -1270,14 +1271,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaaa = m0;
    m0 += c1o6 * oMdrho;
    mfaba = m1 - m0 * vvy;
-   mfaca = m2 - two * m1 * vvy + vy2 * m0;
+   mfaca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaab + mfacb;
    m1 = mfacb - mfaab;
    m0 = m2 + mfabb;
    mfaab = m0;
    mfabb = m1 - m0 * vvy;
-   mfacb = m2 - two * m1 * vvy + vy2 * m0;
+   mfacb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaac + mfacc;
    m1 = mfacc - mfaac;
@@ -1285,7 +1286,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaac = m0;
    m0 += c1o18 * oMdrho;
    mfabc = m1 - m0 * vvy;
-   mfacc = m2 - two * m1 * vvy + vy2 * m0;
+   mfacc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbaa + mfbca;
@@ -1294,14 +1295,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbaa = m0;
    m0 += c2o3 * oMdrho;
    mfbba = m1 - m0 * vvy;
-   mfbca = m2 - two * m1 * vvy + vy2 * m0;
+   mfbca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbab + mfbcb;
    m1 = mfbcb - mfbab;
    m0 = m2 + mfbbb;
    mfbab = m0;
    mfbbb = m1 - m0 * vvy;
-   mfbcb = m2 - two * m1 * vvy + vy2 * m0;
+   mfbcb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbac + mfbcc;
    m1 = mfbcc - mfbac;
@@ -1309,7 +1310,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbac = m0;
    m0 += c2o9 * oMdrho;
    mfbbc = m1 - m0 * vvy;
-   mfbcc = m2 - two * m1 * vvy + vy2 * m0;
+   mfbcc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcaa + mfcca;
@@ -1318,14 +1319,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcaa = m0;
    m0 += c1o6 * oMdrho;
    mfcba = m1 - m0 * vvy;
-   mfcca = m2 - two * m1 * vvy + vy2 * m0;
+   mfcca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcab + mfccb;
    m1 = mfccb - mfcab;
    m0 = m2 + mfcbb;
    mfcab = m0;
    mfcbb = m1 - m0 * vvy;
-   mfccb = m2 - two * m1 * vvy + vy2 * m0;
+   mfccb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcac + mfccc;
    m1 = mfccc - mfcac;
@@ -1333,7 +1334,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcac = m0;
    m0 += c1o18 * oMdrho;
    mfcbc = m1 - m0 * vvy;
-   mfccc = m2 - two * m1 * vvy + vy2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -1343,16 +1344,16 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    m1 = mfcaa - mfaaa;
    m0 = m2 + mfbaa;
    mfaaa = m0;
-   m0 += one * oMdrho;
+   m0 += c1o1 * oMdrho;
    mfbaa = m1 - m0 * vvx;
-   mfcaa = m2 - two * m1 * vvx + vx2 * m0;
+   mfcaa = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaba + mfcba;
    m1 = mfcba - mfaba;
    m0 = m2 + mfbba;
    mfaba = m0;
    mfbba = m1 - m0 * vvx;
-   mfcba = m2 - two * m1 * vvx + vx2 * m0;
+   mfcba = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaca + mfcca;
    m1 = mfcca - mfaca;
@@ -1360,7 +1361,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaca = m0;
    m0 += c1o3 * oMdrho;
    mfbca = m1 - m0 * vvx;
-   mfcca = m2 - two * m1 * vvx + vx2 * m0;
+   mfcca = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaab + mfcab;
@@ -1368,21 +1369,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    m0 = m2 + mfbab;
    mfaab = m0;
    mfbab = m1 - m0 * vvx;
-   mfcab = m2 - two * m1 * vvx + vx2 * m0;
+   mfcab = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfabb + mfcbb;
    m1 = mfcbb - mfabb;
    m0 = m2 + mfbbb;
    mfabb = m0;
    mfbbb = m1 - m0 * vvx;
-   mfcbb = m2 - two * m1 * vvx + vx2 * m0;
+   mfcbb = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfacb + mfccb;
    m1 = mfccb - mfacb;
    m0 = m2 + mfbcb;
    mfacb = m0;
    mfbcb = m1 - m0 * vvx;
-   mfccb = m2 - two * m1 * vvx + vx2 * m0;
+   mfccb = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaac + mfcac;
@@ -1391,14 +1392,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaac = m0;
    m0 += c1o3 * oMdrho;
    mfbac = m1 - m0 * vvx;
-   mfcac = m2 - two * m1 * vvx + vx2 * m0;
+   mfcac = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfabc + mfcbc;
    m1 = mfcbc - mfabc;
    m0 = m2 + mfbbc;
    mfabc = m0;
    mfbbc = m1 - m0 * vvx;
-   mfcbc = m2 - two * m1 * vvx + vx2 * m0;
+   mfcbc = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfacc + mfccc;
    m1 = mfccc - mfacc;
@@ -1406,7 +1407,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfacc = m0;
    m0 += c1o9 * oMdrho;
    mfbcc = m1 - m0 * vvx;
-   mfccc = m2 - two * m1 * vvx + vx2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
 
@@ -1420,7 +1421,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////
    //3.
    //////////////////////////////
-   LBMReal OxyyPxzz = one;//three  * (two - omega) / (three  - omega);//
+   real OxyyPxzz = c1o1;//three  * (two - omega) / (three  - omega);//
    //LBMReal OxyyMxzz = one;//six    * (two - omega) / (six    - omega);//
    //LBMReal Oxyz = one;//twelve * (two - omega) / (twelve + omega);//
    //////////////////////////////
@@ -1444,47 +1445,47 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////
    //4.
    //////////////////////////////
-   LBMReal O4 = one;
+   real O4 = c1o1;
    //////////////////////////////
-   //LBMReal O4        = omega;//TRT
+   //real O4        = omega;//TRT
    ////////////////////////////////////////////////////////////
    //5.
    //////////////////////////////
-   LBMReal O5 = one;
+   real O5 = c1o1;
    ////////////////////////////////////////////////////////////
    //6.
    //////////////////////////////
-   LBMReal O6 = one;
+   real O6 = c1o1;
    ////////////////////////////////////////////////////////////
 
 
    //central moments to cumulants
    //4.
-   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;	//ab 15.05.2015 verwendet
-   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho; //ab 15.05.2015 verwendet
-   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho; //ab 15.05.2015 verwendet
+   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;	//ab 15.05.2015 verwendet
+   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho; //ab 15.05.2015 verwendet
+   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho; //ab 15.05.2015 verwendet
 
-   LBMReal CUMcca = mfcca - (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));
-   LBMReal CUMcac = mfcac - (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));
-   LBMReal CUMacc = mfacc - (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));
+   real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));
+   real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));
+   real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));
 
    //5.
-   LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-   LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-   LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+   real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+   real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+   real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
 
    //6.
 
-   LBMReal CUMccc = mfccc + ((-four * mfbbb * mfbbb
+   real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb
       - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-      - four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-      - two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-      + (four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-         + two * (mfcaa * mfaca * mfaac)
-         + sixteen * mfbba * mfbab * mfabb) / (rho * rho)
+      - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+      - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+         + c2o1 * (mfcaa * mfaca * mfaac)
+         + c16o1 * mfbba * mfbab * mfabb) / (rho * rho)
       - c1o3 * (mfacc + mfcac + mfcca) / rho
       - c1o9 * (mfcaa + mfaca + mfaac) / rho
-      + (two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
       + c1o27 * ((drho * drho - drho) / (rho * rho)));
    //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -1494,9 +1495,9 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
 //2.
 // linear combinations
-   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-   LBMReal mxxMyy = mfcaa - mfaca;
-   LBMReal mxxMzz = mfcaa - mfaac;
+   real mxxPyyPzz = mfcaa + mfaca + mfaac;
+   real mxxMyy = mfcaa - mfaca;
+   real mxxMzz = mfcaa - mfaac;
 
    //////////////////////////////////////////////////////////////////////////
 // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -1536,14 +1537,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
          ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
          //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
    {
-      LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-      LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
-      LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
+      real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+      real dyuy = dxux + omega * c3o2 * mxxMyy;
+      real dzuz = dxux + omega * c3o2 * mxxMzz;
 
       //relax
-      mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-      mxxMyy += omega * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-      mxxMzz += omega * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+      mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+      mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+      mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
       //////////////////////////////////////////////////////////////////////////
       //limiter-Scheise Teil 2
@@ -1573,20 +1574,20 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy + mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //3.
    // linear combinations
 
-   LBMReal mxxyPyzz = mfcba + mfabc;
-   LBMReal mxxyMyzz = mfcba - mfabc;
+   real mxxyPyzz = mfcba + mfabc;
+   real mxxyMyzz = mfcba - mfabc;
 
-   LBMReal mxxzPyyz = mfcab + mfacb;
-   LBMReal mxxzMyyz = mfcab - mfacb;
+   real mxxzPyyz = mfcab + mfacb;
+   real mxxzMyyz = mfcab - mfacb;
 
-   LBMReal mxyyPxzz = mfbca + mfbac;
-   LBMReal mxyyMxzz = mfbca - mfbac;
+   real mxyyPxzz = mfbca + mfbac;
+   real mxyyMxzz = mfbca - mfbac;
 
    //relax
    //////////////////////////////////////////////////////////////////////////
@@ -1664,31 +1665,31 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
    //back cumulants to central moments
    //4.
-   mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;
-   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho;
-   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho;
+   mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
 
-   mfcca = CUMcca + (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
-   mfcac = CUMcac + (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));//(one/rho-one));
-   mfacc = CUMacc + (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
 
    //5.
-   mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-   mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-   mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+   mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+   mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+   mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
 
    //6.
 
-   mfccc = CUMccc - ((-four * mfbbb * mfbbb
+   mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb
       - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-      - four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-      - two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-      + (four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-         + two * (mfcaa * mfaca * mfaac)
-         + sixteen * mfbba * mfbab * mfabb) / (rho * rho)
+      - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+      - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+         + c2o1 * (mfcaa * mfaca * mfaac)
+         + c16o1 * mfbba * mfbab * mfabb) / (rho * rho)
       - c1o3 * (mfacc + mfcac + mfcca) / rho
       - c1o9 * (mfcaa + mfaca + mfaac) / rho
-      + (two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
       + c1o27 * ((drho * drho - drho) / (rho * rho)));
    ////////////////////////////////////////////////////////////////////////////////////
@@ -1704,22 +1705,22 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
 ////////////////////////////////////////////////////////////////////////////////////
 // Z - Dir
-   m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + one * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfaac - two * mfaab * vvz + mfaaa * (one - vz2) - one * oMdrho * vz2;
-   m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + one * oMdrho) * (vz2 + vvz) * c1o2;
+   m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + c1o1 * oMdrho) * (vz2 - vvz) * c1o2;
+   m1 = -mfaac - c2o1 * mfaab * vvz + mfaaa * (c1o1 - vz2) - c1o1 * oMdrho * vz2;
+   m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + c1o1 * oMdrho) * (vz2 + vvz) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfabc * c1o2 + mfabb * (vvz - c1o2) + mfaba * (vz2 - vvz) * c1o2;
-   m1 = -mfabc - two * mfabb * vvz + mfaba * (one - vz2);
+   m1 = -mfabc - c2o1 * mfabb * vvz + mfaba * (c1o1 - vz2);
    m2 = mfabc * c1o2 + mfabb * (vvz + c1o2) + mfaba * (vz2 + vvz) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfacc - two * mfacb * vvz + mfaca * (one - vz2) - c1o3 * oMdrho * vz2;
+   m1 = -mfacc - c2o1 * mfacb * vvz + mfaca * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
    m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1727,21 +1728,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfbac * c1o2 + mfbab * (vvz - c1o2) + mfbaa * (vz2 - vvz) * c1o2;
-   m1 = -mfbac - two * mfbab * vvz + mfbaa * (one - vz2);
+   m1 = -mfbac - c2o1 * mfbab * vvz + mfbaa * (c1o1 - vz2);
    m2 = mfbac * c1o2 + mfbab * (vvz + c1o2) + mfbaa * (vz2 + vvz) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbbc * c1o2 + mfbbb * (vvz - c1o2) + mfbba * (vz2 - vvz) * c1o2;
-   m1 = -mfbbc - two * mfbbb * vvz + mfbba * (one - vz2);
+   m1 = -mfbbc - c2o1 * mfbbb * vvz + mfbba * (c1o1 - vz2);
    m2 = mfbbc * c1o2 + mfbbb * (vvz + c1o2) + mfbba * (vz2 + vvz) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcc * c1o2 + mfbcb * (vvz - c1o2) + mfbca * (vz2 - vvz) * c1o2;
-   m1 = -mfbcc - two * mfbcb * vvz + mfbca * (one - vz2);
+   m1 = -mfbcc - c2o1 * mfbcb * vvz + mfbca * (c1o1 - vz2);
    m2 = mfbcc * c1o2 + mfbcb * (vvz + c1o2) + mfbca * (vz2 + vvz) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1749,21 +1750,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfcac - two * mfcab * vvz + mfcaa * (one - vz2) - c1o3 * oMdrho * vz2;
+   m1 = -mfcac - c2o1 * mfcab * vvz + mfcaa * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
    m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfcbc * c1o2 + mfcbb * (vvz - c1o2) + mfcba * (vz2 - vvz) * c1o2;
-   m1 = -mfcbc - two * mfcbb * vvz + mfcba * (one - vz2);
+   m1 = -mfcbc - c2o1 * mfcbb * vvz + mfcba * (c1o1 - vz2);
    m2 = mfcbc * c1o2 + mfcbb * (vvz + c1o2) + mfcba * (vz2 + vvz) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfccb * (vvz - c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfccc - two * mfccb * vvz + mfcca * (one - vz2) - c1o9 * oMdrho * vz2;
+   m1 = -mfccc - c2o1 * mfccb * vvz + mfcca * (c1o1 - vz2) - c1o9 * oMdrho * vz2;
    m2 = mfccc * c1o2 + mfccb * (vvz + c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 + vvz) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1774,21 +1775,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 = mfaca * c1o2 + mfaba * (vvy - c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfaca - two * mfaba * vvy + mfaaa * (one - vy2) - c1o6 * oMdrho * vy2;
+   m1 = -mfaca - c2o1 * mfaba * vvy + mfaaa * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
    m2 = mfaca * c1o2 + mfaba * (vvy + c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacb * c1o2 + mfabb * (vvy - c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfacb - two * mfabb * vvy + mfaab * (one - vy2) - c2o3 * oMdrho * vy2;
+   m1 = -mfacb - c2o1 * mfabb * vvy + mfaab * (c1o1 - vy2) - c2o3 * oMdrho * vy2;
    m2 = mfacb * c1o2 + mfabb * (vvy + c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacc * c1o2 + mfabc * (vvy - c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfacc - two * mfabc * vvy + mfaac * (one - vy2) - c1o6 * oMdrho * vy2;
+   m1 = -mfacc - c2o1 * mfabc * vvy + mfaac * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
    m2 = mfacc * c1o2 + mfabc * (vvy + c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1796,21 +1797,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfbca * c1o2 + mfbba * (vvy - c1o2) + mfbaa * (vy2 - vvy) * c1o2;
-   m1 = -mfbca - two * mfbba * vvy + mfbaa * (one - vy2);
+   m1 = -mfbca - c2o1 * mfbba * vvy + mfbaa * (c1o1 - vy2);
    m2 = mfbca * c1o2 + mfbba * (vvy + c1o2) + mfbaa * (vy2 + vvy) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcb * c1o2 + mfbbb * (vvy - c1o2) + mfbab * (vy2 - vvy) * c1o2;
-   m1 = -mfbcb - two * mfbbb * vvy + mfbab * (one - vy2);
+   m1 = -mfbcb - c2o1 * mfbbb * vvy + mfbab * (c1o1 - vy2);
    m2 = mfbcb * c1o2 + mfbbb * (vvy + c1o2) + mfbab * (vy2 + vvy) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcc * c1o2 + mfbbc * (vvy - c1o2) + mfbac * (vy2 - vvy) * c1o2;
-   m1 = -mfbcc - two * mfbbc * vvy + mfbac * (one - vy2);
+   m1 = -mfbcc - c2o1 * mfbbc * vvy + mfbac * (c1o1 - vy2);
    m2 = mfbcc * c1o2 + mfbbc * (vvy + c1o2) + mfbac * (vy2 + vvy) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1818,21 +1819,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcca * c1o2 + mfcba * (vvy - c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfcca - two * mfcba * vvy + mfcaa * (one - vy2) - c1o18 * oMdrho * vy2;
+   m1 = -mfcca - c2o1 * mfcba * vvy + mfcaa * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
    m2 = mfcca * c1o2 + mfcba * (vvy + c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccb * c1o2 + mfcbb * (vvy - c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfccb - two * mfcbb * vvy + mfcab * (one - vy2) - c2o9 * oMdrho * vy2;
+   m1 = -mfccb - c2o1 * mfcbb * vvy + mfcab * (c1o1 - vy2) - c2o9 * oMdrho * vy2;
    m2 = mfccb * c1o2 + mfcbb * (vvy + c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfcbc * (vvy - c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfccc - two * mfcbc * vvy + mfcac * (one - vy2) - c1o18 * oMdrho * vy2;
+   m1 = -mfccc - c2o1 * mfcbc * vvy + mfcac * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
    m2 = mfccc * c1o2 + mfcbc * (vvy + c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1843,21 +1844,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 = mfcaa * c1o2 + mfbaa * (vvx - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcaa - two * mfbaa * vvx + mfaaa * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcaa - c2o1 * mfbaa * vvx + mfaaa * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcaa * c1o2 + mfbaa * (vvx + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcba * c1o2 + mfbba * (vvx - c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcba - two * mfbba * vvx + mfaba * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcba - c2o1 * mfbba * vvx + mfaba * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcba * c1o2 + mfbba * (vvx + c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcca * c1o2 + mfbca * (vvx - c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcca - two * mfbca * vvx + mfaca * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcca - c2o1 * mfbca * vvx + mfaca * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcca * c1o2 + mfbca * (vvx + c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1865,21 +1866,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcab * c1o2 + mfbab * (vvx - c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcab - two * mfbab * vvx + mfaab * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcab - c2o1 * mfbab * vvx + mfaab * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcab * c1o2 + mfbab * (vvx + c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 = mfcbb * c1o2 + mfbbb * (vvx - c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcbb - two * mfbbb * vvx + mfabb * (one - vx2) - c4o9 * oMdrho * vx2;
+   m1 = -mfcbb - c2o1 * mfbbb * vvx + mfabb * (c1o1 - vx2) - c4o9 * oMdrho * vx2;
    m2 = mfcbb * c1o2 + mfbbb * (vvx + c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 = mfccb * c1o2 + mfbcb * (vvx - c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfccb - two * mfbcb * vvx + mfacb * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfccb - c2o1 * mfbcb * vvx + mfacb * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfccb * c1o2 + mfbcb * (vvx + c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1887,21 +1888,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcac * c1o2 + mfbac * (vvx - c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcac - two * mfbac * vvx + mfaac * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcac - c2o1 * mfbac * vvx + mfaac * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcac * c1o2 + mfbac * (vvx + c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 = mfcbc * c1o2 + mfbbc * (vvx - c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcbc - two * mfbbc * vvx + mfabc * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcbc - c2o1 * mfbbc * vvx + mfabc * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcbc * c1o2 + mfbbc * (vvx + c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfbcc * (vvx - c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfccc - two * mfbcc * vvx + mfacc * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfccc - c2o1 * mfbcc * vvx + mfacc * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfccc * c1o2 + mfbcc * (vvx + c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1912,11 +1913,11 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    //proof correctness
    //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-   LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+   real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
       + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
       + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
    //LBMReal dif = fabs(rho - rho_post);
-   LBMReal dif = drho - drho_post;
+   real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
    if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
index 995ce63d877d833e7907d6335e609cfa7478aebd..9fb6d834fc8904af84e20798b03ea23b68ad63a4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
@@ -21,35 +21,35 @@ public:
    virtual ~CumulantLBMKernel() = default;
    //virtual void calculate(int step);
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setBulkOmegaToOmega(bool value);
    void setRelaxationParameter(Parameter p);
 protected:
    void initData() override;
    void nodeCollision(int step, int x1, int x2, int x3) override;
    void initDataSet();
-   LBMReal f[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF + 1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1, muX2, muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzz;
+   real OxxPyyPzz;
 
-   LBMReal omega;
+   real omega;
 };
 #endif // CumulantLBMKernel_h__
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
index 7e2fd17c0577ea4606f4b38530985411764b5bdb..b7aed29bbf30f3081633df6f45eaf7ade9d93572 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
@@ -4,7 +4,8 @@
 
 namespace D3Q27System
 {
-using namespace UbMath;
+//using namespace UbMath;
+    using namespace vf::lbm::constant;
 
 // index            0   1   2   3   4   5  6   7   8   9   10  11  12  13  14  15  16  17   18  19  20  21  22  23  24  25
 // f:               E,  W,  N,  S,  T,  B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE BSW
@@ -18,42 +19,42 @@ const int DX1[] = { 0,  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,
 const int DX2[] = { 0,  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   1,  1, -1, -1,  1,  1, -1, -1 };
 const int DX3[] = { 0,  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   1,  1,  1,  1, -1, -1, -1, -1 };
 
-const double WEIGTH[] = { c8o27,  
+const real WEIGTH[] = { c8o27,  
                           c2o27,  c2o27,  c2o27,  c2o27,  c2o27,  c2o27,  
                           c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,
                           c1o216, c1o216, c1o216, c1o216, c1o216, c1o216, c1o216, c1o216 };
 
-const int INVDIR[] = { DIR_000, INV_P00,   INV_M00,   INV_0P0,   INV_0M0,   INV_00P,   INV_00M,   INV_PP0,  INV_MM0, INV_PM0,
-                       INV_MP0,  INV_P0P,  INV_M0M,  INV_P0M,  INV_M0P,  INV_0PP,  INV_0MM,  INV_0PM, INV_0MP,
-                       INV_PPP, INV_MPP, INV_PMP, INV_MMP, INV_PPM, INV_MPM, INV_PMM, INV_MMM };
+const int INVDIR[] = { vf::lbm::dir::DIR_000, vf::lbm::dir::INV_P00,   vf::lbm::dir::INV_M00,  vf::lbm::dir::INV_0P0,  vf::lbm::dir::INV_0M0,   vf::lbm::dir::INV_00P,   vf::lbm::dir::INV_00M,   vf::lbm::dir::INV_PP0,  vf::lbm::dir::INV_MM0, vf::lbm::dir::INV_PM0,
+                       vf::lbm::dir::INV_MP0, vf::lbm::dir::INV_P0P,  vf::lbm::dir::INV_M0M,  vf::lbm::dir::INV_P0M,  vf::lbm::dir::INV_M0P,  vf::lbm::dir::INV_0PP,  vf::lbm::dir::INV_0MM,  vf::lbm::dir::INV_0PM, vf::lbm::dir::INV_0MP,
+                       vf::lbm::dir::INV_PPP, vf::lbm::dir::INV_MPP, vf::lbm::dir::INV_PMP, vf::lbm::dir::INV_MMP, vf::lbm::dir::INV_PPM, vf::lbm::dir::INV_MPM, vf::lbm::dir::INV_PMM, vf::lbm::dir::INV_MMM };
 
 // index             0   1   2   3   4   5  6   7   8    9  10  11  12  13  14  15  16  17  18
 // direction:        E,  W,  N,  S,  T,  B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE
 // BSW
-const int EX1[] = { 0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 };
-const int EX2[] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1 };
-const int EX3[] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1 };
+//const int EX1[] = { 0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 };
+//const int EX2[] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1 };
+//const int EX3[] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1 };
 
 //////////////////////////////////////////////////////////////////////////
 
 
 
-LBMReal getDensity(const LBMReal *const &f /*[27]*/)
+real getDensity(const real *const &f /*[27]*/)
 {
     return vf::lbm::getDensity(f);
 }
 
-LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX1(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX1(f);
 }
 
-LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX2(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX2(f);
 }
 
-LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX3(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX3(f);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
index d8822219484599e8f31fa86564ecdaf844679d52..4b7f6dcea27a293cb319a65b3a864965cbed01a7 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
@@ -38,9 +38,13 @@
 #include <string>
 #include <iostream>
 
+#include "lbm/constants/D3Q27.h"
 #include "LBMSystem.h"
 #include "UbException.h"
 #include "UbMath.h"
+#include "lbm/constants/NumericConstants.h"
+
+//using namespace vf::lbm::dir;
 
 //! \brief namespace for global system-functions
 namespace D3Q27System
@@ -53,15 +57,15 @@ static const int FENDDIR   = 26; // D3Q27
 static const int STARTF = 0;
 static const int ENDF   = 26; // D3Q27
 
-static const int STARTDIR = 1; //0
+//static const int STARTDIR = 1; //0
 static const int ENDDIR   = 26;//26 // all geometric directions
 
 extern const int DX1[ENDDIR + 1];
 extern const int DX2[ENDDIR + 1];
 extern const int DX3[ENDDIR + 1];
-extern const double WEIGTH[ENDDIR + 1];
+extern const real WEIGTH[ENDDIR + 1];
 
-extern const double cNorm[3][ENDDIR];
+extern const real cNorm[3][ENDDIR];
 
 static const int MINLEVEL = 0;
 static const int MAXLEVEL = 25;
@@ -126,60 +130,60 @@ extern const int EX3[ENDDIR + 1];
 //static constexpr int BSE = 25;
 //static constexpr int BSW = 26;
 
-static constexpr int DIR_000 = 0;
-static constexpr int DIR_P00 = 1;
-static constexpr int DIR_M00 = 2;
-static constexpr int DIR_0P0 = 3;
-static constexpr int DIR_0M0 = 4;
-static constexpr int DIR_00P = 5;
-static constexpr int DIR_00M = 6;
-static constexpr int DIR_PP0 = 7;
-static constexpr int DIR_MM0 = 8;
-static constexpr int DIR_PM0 = 9;
-static constexpr int DIR_MP0 = 10;
-static constexpr int DIR_P0P = 11;
-static constexpr int DIR_M0M = 12;
-static constexpr int DIR_P0M = 13;
-static constexpr int DIR_M0P = 14;
-static constexpr int DIR_0PP = 15;
-static constexpr int DIR_0MM = 16;
-static constexpr int DIR_0PM = 17;
-static constexpr int DIR_0MP = 18;
-static constexpr int DIR_PPP = 19;
-static constexpr int DIR_MPP = 20;
-static constexpr int DIR_PMP = 21;
-static constexpr int DIR_MMP = 22;
-static constexpr int DIR_PPM = 23;
-static constexpr int DIR_MPM = 24;
-static constexpr int DIR_PMM = 25;
-static constexpr int DIR_MMM = 26;
-
-static constexpr int INV_P00 = DIR_M00;
-static constexpr int INV_M00 = DIR_P00;
-static constexpr int INV_0P0 = DIR_0M0;
-static constexpr int INV_0M0 = DIR_0P0;
-static constexpr int INV_00P = DIR_00M;
-static constexpr int INV_00M = DIR_00P;
-static constexpr int INV_PP0 = DIR_MM0;
-static constexpr int INV_MM0 = DIR_PP0;
-static constexpr int INV_PM0 = DIR_MP0;
-static constexpr int INV_MP0 = DIR_PM0;
-static constexpr int INV_P0P = DIR_M0M;
-static constexpr int INV_M0M = DIR_P0P;
-static constexpr int INV_P0M = DIR_M0P;
-static constexpr int INV_M0P = DIR_P0M;
-static constexpr int INV_0PP = DIR_0MM;
-static constexpr int INV_0MM = DIR_0PP;
-static constexpr int INV_0PM = DIR_0MP;
-static constexpr int INV_0MP = DIR_0PM;
-static constexpr int INV_PPP = DIR_MMM;
-static constexpr int INV_MPP = DIR_PMM;
-static constexpr int INV_PMP = DIR_MPM;
-static constexpr int INV_MMP = DIR_PPM;
-static constexpr int INV_PPM = DIR_MMP;
-static constexpr int INV_MPM = DIR_PMP;
-static constexpr int INV_PMM = DIR_MPP;
-static constexpr int INV_MMM = DIR_PPP;
+//static constexpr int DIR_000 = 0;
+//static constexpr int DIR_P00 = 1;
+//static constexpr int DIR_M00 = 2;
+//static constexpr int DIR_0P0 = 3;
+//static constexpr int DIR_0M0 = 4;
+//static constexpr int DIR_00P = 5;
+//static constexpr int DIR_00M = 6;
+//static constexpr int DIR_PP0 = 7;
+//static constexpr int DIR_MM0 = 8;
+//static constexpr int DIR_PM0 = 9;
+//static constexpr int DIR_MP0 = 10;
+//static constexpr int DIR_P0P = 11;
+//static constexpr int DIR_M0M = 12;
+//static constexpr int DIR_P0M = 13;
+//static constexpr int DIR_M0P = 14;
+//static constexpr int DIR_0PP = 15;
+//static constexpr int DIR_0MM = 16;
+//static constexpr int DIR_0PM = 17;
+//static constexpr int DIR_0MP = 18;
+//static constexpr int DIR_PPP = 19;
+//static constexpr int DIR_MPP = 20;
+//static constexpr int DIR_PMP = 21;
+//static constexpr int DIR_MMP = 22;
+//static constexpr int DIR_PPM = 23;
+//static constexpr int DIR_MPM = 24;
+//static constexpr int DIR_PMM = 25;
+//static constexpr int DIR_MMM = 26;
+
+//static constexpr int INV_P00 = DIR_M00;
+//static constexpr int INV_M00 = DIR_P00;
+//static constexpr int INV_0P0 = DIR_0M0;
+//static constexpr int INV_0M0 = DIR_0P0;
+//static constexpr int INV_00P = DIR_00M;
+//static constexpr int INV_00M = DIR_00P;
+//static constexpr int INV_PP0 = DIR_MM0;
+//static constexpr int INV_MM0 = DIR_PP0;
+//static constexpr int INV_PM0 = DIR_MP0;
+//static constexpr int INV_MP0 = DIR_PM0;
+//static constexpr int INV_P0P = DIR_M0M;
+//static constexpr int INV_M0M = DIR_P0P;
+//static constexpr int INV_P0M = DIR_M0P;
+//static constexpr int INV_M0P = DIR_P0M;
+//static constexpr int INV_0PP = DIR_0MM;
+//static constexpr int INV_0MM = DIR_0PP;
+//static constexpr int INV_0PM = DIR_0MP;
+//static constexpr int INV_0MP = DIR_0PM;
+//static constexpr int INV_PPP = DIR_MMM;
+//static constexpr int INV_MPP = DIR_PMM;
+//static constexpr int INV_PMP = DIR_MPM;
+//static constexpr int INV_MMP = DIR_PPM;
+//static constexpr int INV_PPM = DIR_MMP;
+//static constexpr int INV_MPM = DIR_PMP;
+//static constexpr int INV_PMM = DIR_MPP;
+//static constexpr int INV_MMM = DIR_PPP;
 
 extern const int INVDIR[ENDDIR + 1];
 
@@ -213,6 +217,8 @@ static const int ET_BNE = 12;
 //////////////////////////////////////////////////////////////////////////
 inline std::string getDirectionString(int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
         case DIR_P00:
             return "E";
@@ -273,109 +279,111 @@ inline std::string getDirectionString(int direction)
 //////////////////////////////////////////////////////////////////////////
 static inline void setNeighborCoordinatesForDirection(int &x1, int &x2, int &x3, const int &direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             x1++;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             x2++;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             x3++;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             x1--;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             x2--;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             x3--;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             x1++;
             x2++;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             x1--;
             x2++;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             x1--;
             x2--;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             x1++;
             x2--;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             x1++;
             x3++;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             x1--;
             x3--;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             x1++;
             x3--;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             x1--;
             x3++;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             x2--;
             x3--;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             x1++;
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             x1--;
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             x1++;
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             x1--;
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             x1++;
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             x1--;
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             x1++;
             x2--;
             x3--;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             x1--;
             x2--;
             x3--;
@@ -388,88 +396,108 @@ static inline void setNeighborCoordinatesForDirection(int &x1, int &x2, int &x3,
 //////////////////////////////////////////////////////////////////////////
 // MACROSCOPIC VALUES
 /*=====================================================================*/
-LBMReal getDensity(const LBMReal *const &f /*[27]*/);
+real getDensity(const real *const &f /*[27]*/);
 /*=====================================================================*/
-static LBMReal getPressure(const LBMReal *const &f /*[27]*/) { return REAL_CAST(UbMath::c1o3) * getDensity(f); }
+static real getPressure(const real *const &f /*[27]*/) { return REAL_CAST(vf::lbm::constant::c1o3) * getDensity(f); }
 /*=====================================================================*/
-LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX1(const real *const &f /*[27]*/);
 /*=====================================================================*/
-LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX2(const real *const &f /*[27]*/);
 /*=====================================================================*/
-LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX3(const real *const &f /*[27]*/);
 
 
 /*=====================================================================*/
-static void calcDensity(const LBMReal *const &f /*[27]*/, LBMReal &rho)
+static void calcDensity(const real *const &f /*[27]*/, real &rho)
 {
+    using namespace vf::lbm::dir;
+
     rho = ((f[DIR_PPP] + f[DIR_MMM]) + (f[DIR_PMP] + f[DIR_MPM])) + ((f[DIR_PMM] + f[DIR_MPP]) + (f[DIR_MMP] + f[DIR_PPM])) +
           (((f[DIR_PP0] + f[DIR_MM0]) + (f[DIR_PM0] + f[DIR_MP0])) + ((f[DIR_P0P] + f[DIR_M0M]) + (f[DIR_P0M] + f[DIR_M0P])) +
            ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM]))) +
           ((f[DIR_P00] + f[DIR_M00]) + (f[DIR_0P0] + f[DIR_0M0]) + (f[DIR_00P] + f[DIR_00M])) + f[DIR_000];
 }
 /*=====================================================================*/
-static void calcIncompVelocityX1(const LBMReal *const &f /*[27]*/, LBMReal &vx1)
+static void calcIncompVelocityX1(const real *const &f /*[27]*/, real &vx1)
 {
+    using namespace vf::lbm::dir;
+
     vx1 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00]));
 }
 /*=====================================================================*/
-static void calcIncompVelocityX2(const LBMReal *const &f /*[27]*/, LBMReal &vx2)
+static void calcIncompVelocityX2(const real *const &f /*[27]*/, real &vx2)
 {
+    using namespace vf::lbm::dir;
+
     vx2 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0]));
 }
 /*=====================================================================*/
-static void calcIncompVelocityX3(const LBMReal *const &f /*[27]*/, LBMReal &vx3)
+static void calcIncompVelocityX3(const real *const &f /*[27]*/, real &vx3)
 {
+    using namespace vf::lbm::dir;
+
     vx3 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
            (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M]));
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX1(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX1(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
             (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX2(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX2(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
             (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX3(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX3(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
             (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX1(const LBMReal *const &f /*[27]*/, LBMReal &vx1)
+static void calcCompVelocityX1(const real *const &f /*[27]*/, real &vx1)
 {
+    using namespace vf::lbm::dir;
+
     vx1 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX2(const LBMReal *const &f /*[27]*/, LBMReal &vx2)
+static void calcCompVelocityX2(const real *const &f /*[27]*/, real &vx2)
 {
+    using namespace vf::lbm::dir;
+
     vx2 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX3(const LBMReal *const &f /*[27]*/, LBMReal &vx3)
+static void calcCompVelocityX3(const real *const &f /*[27]*/, real &vx3)
 {
+    using namespace vf::lbm::dir;
+
     vx3 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
            (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcIncompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMReal &rho, LBMReal &vx1, LBMReal &vx2,
-                                        LBMReal &vx3)
+static void calcIncompMacroscopicValues(const real *const &f /*[27]*/, real &rho, real &vx1, real &vx2,
+                                        real &vx3)
 {
     D3Q27System::calcDensity(f, rho);
     D3Q27System::calcIncompVelocityX1(f, vx1);
@@ -478,342 +506,353 @@ static void calcIncompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMRea
 }
 
 /*=====================================================================*/
-static void calcCompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMReal &drho, LBMReal &vx1, LBMReal &vx2,
-                                      LBMReal &vx3)
+static void calcCompMacroscopicValues(const real *const &f /*[27]*/, real &drho, real &vx1, real &vx2,
+                                      real &vx3)
 {
     D3Q27System::calcDensity(f, drho);
     D3Q27System::calcIncompVelocityX1(f, vx1);
     D3Q27System::calcIncompVelocityX2(f, vx2);
     D3Q27System::calcIncompVelocityX3(f, vx3);
-    LBMReal rho = drho + UbMath::one;
+    //real rho = drho + vf::lbm::constant::one;
+    real rho = drho + vf::lbm::constant::c1o1;
     vx1 /= rho;
     vx2 /= rho;
     vx3 /= rho;
 }
 //////////////////////////////////////////////////////////////////////////
-static LBMReal getCompFeqForDirection(const int &direction, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                                      const LBMReal &vx3)
+static real getCompFeqForDirection(const int &direction, const real &drho, const real &vx1, const real &vx2,
+                                      const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-    LBMReal rho   = drho + UbMath::one;
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    real rho   = drho + vf::lbm::constant::c1o1;
     switch (direction) {
         case DIR_000:
-            return REAL_CAST(UbMath::c8o27 * (drho + rho * (-cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c8o27 * (drho + rho * (-cu_sq)));
         case DIR_P00:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx1) +vf::lbm::constant::c9o2 * (vx1) * (vx1)-cu_sq)));
         case DIR_M00:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx1) + vf::lbm::constant::c9o2 * (-vx1) * (-vx1) - cu_sq)));
         case DIR_0P0:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx2) +vf::lbm::constant::c9o2 * (vx2) * (vx2)-cu_sq)));
         case DIR_0M0:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx2) + vf::lbm::constant::c9o2 * (-vx2) * (-vx2) - cu_sq)));
         case DIR_00P:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx3) + vf::lbm::constant::c9o2 * (vx3) * (vx3)-cu_sq)));
         case DIR_00M:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx3) + vf::lbm::constant::c9o2 * (-vx3) * (-vx3) - cu_sq)));
         case DIR_PP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 + vx2) + vf::lbm::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq)));
         case DIR_MM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 - vx2) + vf::lbm::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq)));
         case DIR_PM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 - vx2) + vf::lbm::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq)));
         case DIR_MP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 + vx2) + vf::lbm::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq)));
         case DIR_P0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq)));
         case DIR_M0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq)));
         case DIR_P0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq)));
         case DIR_M0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq)));
         case DIR_0PP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx2 + vx3) + vf::lbm::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq)));
         case DIR_0MM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq)));
         case DIR_0PM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx2 - vx3) + vf::lbm::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq)));
         case DIR_0MP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq)));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq)));
         case DIR_PPP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 + vx2 + vx3) +
-                                            UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)));
         case DIR_MMM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 - vx2 - vx3) +
-                                            UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq)));
         case DIR_PPM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 + vx2 - vx3) +
-                                            UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)));
         case DIR_MMP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 - vx2 + vx3) +
-                                            UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq)));
+                                            vf::lbm::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq)));
         case DIR_PMP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 - vx2 + vx3) +
-                                            UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)));
         case DIR_MPM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 + vx2 - vx3) +
-                                            UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq)));
         case DIR_PMM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 - vx2 - vx3) +
-                                            UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)));
         case DIR_MPP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::lbm::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 + vx2 + vx3) +
-                                            UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq)));
+                                 vf::lbm::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq)));
         default:
             throw UbException(UB_EXARGS, "unknown dir");
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcCompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                        const LBMReal &vx3)
+static void calcCompFeq(real *const &feq /*[27]*/, const real &drho, const real &vx1, const real &vx2,
+                        const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-    LBMReal rho   = drho + UbMath::one;
-
-    feq[DIR_000] = UbMath::c8o27 * (drho + rho * (-cu_sq));
-    feq[DIR_P00]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq));
-    feq[DIR_M00]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq));
-    feq[DIR_0P0]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq));
-    feq[DIR_0M0]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq));
-    feq[DIR_00P]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq));
-    feq[DIR_00M]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq));
-    feq[DIR_PP0]   = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
-    feq[DIR_MM0]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
-    feq[DIR_PM0]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
-    feq[DIR_MP0]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
-    feq[DIR_P0P]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
-    feq[DIR_M0M]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
-    feq[DIR_P0M]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
-    feq[DIR_M0P]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
-    feq[DIR_0PP]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
-    feq[DIR_0MM]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
-    feq[DIR_0PM]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
-    feq[DIR_0MP]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
-    feq[DIR_PPP] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 + vx2 + vx3) + UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    real rho   = drho + vf::lbm::constant::c1o1;
+
+    feq[DIR_000] = vf::lbm::constant::c8o27 * (drho + rho * (-cu_sq));
+    feq[DIR_P00]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx1) + vf::lbm::constant::c9o2 * (vx1) * (vx1)-cu_sq));
+    feq[DIR_M00]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx1) + vf::lbm::constant::c9o2 * (-vx1) * (-vx1) - cu_sq));
+    feq[DIR_0P0]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx2) + vf::lbm::constant::c9o2 * (vx2) * (vx2)-cu_sq));
+    feq[DIR_0M0]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx2) + vf::lbm::constant::c9o2 * (-vx2) * (-vx2) - cu_sq));
+    feq[DIR_00P]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (vx3) + vf::lbm::constant::c9o2 * (vx3) * (vx3)-cu_sq));
+    feq[DIR_00M]    = vf::lbm::constant::c2o27 * (drho + rho * (3.0 * (-vx3) + vf::lbm::constant::c9o2 * (-vx3) * (-vx3) - cu_sq));
+    feq[DIR_PP0]   = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx1 + vx2) + vf::lbm::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
+    feq[DIR_MM0]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx1 - vx2) + vf::lbm::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
+    feq[DIR_PM0]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx1 - vx2) + vf::lbm::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
+    feq[DIR_MP0]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx1 + vx2) + vf::lbm::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
+    feq[DIR_P0P]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx1 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
+    feq[DIR_M0M]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx1 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
+    feq[DIR_P0M]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx1 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
+    feq[DIR_M0P]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx1 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
+    feq[DIR_0PP]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx2 + vx3) + vf::lbm::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
+    feq[DIR_0MM]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
+    feq[DIR_0PM]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (vx2 - vx3) + vf::lbm::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
+    feq[DIR_0MP]  = vf::lbm::constant::c1o54 * (drho + rho * (3.0 * (-vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
+    feq[DIR_PPP] = vf::lbm::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 + vx2 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
     feq[DIR_MMM] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 - vx2 - vx3) + UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
-    feq[DIR_PPM] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 + vx2 - vx3) + UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
+        vf::lbm::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 - vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
+    feq[DIR_PPM] = vf::lbm::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 + vx2 - vx3) + vf::lbm::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
     feq[DIR_MMP] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 - vx2 + vx3) + UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
-    feq[DIR_PMP] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 - vx2 + vx3) + UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
+        vf::lbm::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 - vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
+    feq[DIR_PMP] = vf::lbm::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 - vx2 + vx3) + vf::lbm::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
     feq[DIR_MPM] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 + vx2 - vx3) + UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
-    feq[DIR_PMM] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 - vx2 - vx3) + UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
+        vf::lbm::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 + vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
+    feq[DIR_PMM] = vf::lbm::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 - vx2 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
     feq[DIR_MPP] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 + vx2 + vx3) + UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
+        vf::lbm::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 + vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static LBMReal getIncompFeqForDirection(const int &direction, const LBMReal &drho, const LBMReal &vx1,
-                                        const LBMReal &vx2, const LBMReal &vx3)
+static real getIncompFeqForDirection(const int &direction, const real &drho, const real &vx1,
+                                        const real &vx2, const real &vx3)
 {
-    LBMReal cu_sq = 1.5f * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5f * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     switch (direction) {
         case DIR_000:
-            return REAL_CAST(UbMath::c8o27 * (drho - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c8o27 * (drho - cu_sq));
         case DIR_P00:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (vx1) + vf::lbm::constant::c9o2 * (vx1) * (vx1)-cu_sq));
         case DIR_M00:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx1) + vf::lbm::constant::c9o2 * (-vx1) * (-vx1) - cu_sq));
         case DIR_0P0:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (vx2) + vf::lbm::constant::c9o2 * (vx2) * (vx2)-cu_sq));
         case DIR_0M0:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx2) + vf::lbm::constant::c9o2 * (-vx2) * (-vx2) - cu_sq));
         case DIR_00P:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (vx3) + vf::lbm::constant::c9o2 * (vx3) * (vx3)-cu_sq));
         case DIR_00M:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx3) + vf::lbm::constant::c9o2 * (-vx3) * (-vx3) - cu_sq));
         case DIR_PP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx1 + vx2) + vf::lbm::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
         case DIR_MM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 - vx2) + vf::lbm::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
         case DIR_PM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx1 - vx2) + vf::lbm::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
         case DIR_MP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 + vx2) + vf::lbm::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
         case DIR_P0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx1 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
         case DIR_M0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
         case DIR_P0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx1 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
         case DIR_M0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
         case DIR_0PP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx2 + vx3) + vf::lbm::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
         case DIR_0MM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
         case DIR_0PM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (vx2 - vx3) + vf::lbm::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
         case DIR_0MP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o54 *
+                             (drho + 3.0 * (-vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
         case DIR_PPP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 + vx2 + vx3) +
-                                               UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (vx1 + vx2 + vx3) +
+                                               vf::lbm::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
         case DIR_MMM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) +
-                                               UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) +
+                                               vf::lbm::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
         case DIR_PPM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 + vx2 - vx3) +
-                                               UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (vx1 + vx2 - vx3) +
+                                               vf::lbm::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
         case DIR_MMP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) +
-                                               UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) +
+                                               vf::lbm::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
         case DIR_PMP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 - vx2 + vx3) +
-                                               UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (vx1 - vx2 + vx3) +
+                                               vf::lbm::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
         case DIR_MPM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) +
-                                               UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) +
+                                               vf::lbm::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
         case DIR_PMM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 - vx2 - vx3) +
-                                               UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (vx1 - vx2 - vx3) +
+                                               vf::lbm::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
         case DIR_MPP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) +
-                                               UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::lbm::constant::c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) +
+                                               vf::lbm::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
         default:
             throw UbException(UB_EXARGS, "unknown dir");
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcIncompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                          const LBMReal &vx3)
+static void calcIncompFeq(real *const &feq /*[27]*/, const real &drho, const real &vx1, const real &vx2,
+                          const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-
-    feq[DIR_000] = UbMath::c8o27 * (drho - cu_sq);
-    feq[DIR_P00]    = UbMath::c2o27 * (drho + 3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq);
-    feq[DIR_M00]    = UbMath::c2o27 * (drho + 3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq);
-    feq[DIR_0P0]    = UbMath::c2o27 * (drho + 3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq);
-    feq[DIR_0M0]    = UbMath::c2o27 * (drho + 3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq);
-    feq[DIR_00P]    = UbMath::c2o27 * (drho + 3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq);
-    feq[DIR_00M]    = UbMath::c2o27 * (drho + 3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq);
-    feq[DIR_PP0]   = UbMath::c1o54 * (drho + 3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq);
-    feq[DIR_MM0]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq);
-    feq[DIR_PM0]   = UbMath::c1o54 * (drho + 3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq);
-    feq[DIR_MP0]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq);
-    feq[DIR_P0P]   = UbMath::c1o54 * (drho + 3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq);
-    feq[DIR_M0M]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq);
-    feq[DIR_P0M]   = UbMath::c1o54 * (drho + 3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq);
-    feq[DIR_M0P]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq);
-    feq[DIR_0PP]   = UbMath::c1o54 * (drho + 3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq);
-    feq[DIR_0MM]   = UbMath::c1o54 * (drho + 3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq);
-    feq[DIR_0PM]   = UbMath::c1o54 * (drho + 3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq);
-    feq[DIR_0MP]   = UbMath::c1o54 * (drho + 3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq);
-    feq[DIR_PPP]  = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 + vx2 + vx3) + UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq);
-    feq[DIR_MMM] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 - vx2 - vx3) + UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-    feq[DIR_PPM] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 + vx2 - vx3) + UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq);
-    feq[DIR_MMP] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 - vx2 + vx3) + UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-    feq[DIR_PMP] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 - vx2 + vx3) + UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq);
-    feq[DIR_MPM] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 + vx2 - vx3) + UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-    feq[DIR_PMM] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 - vx2 - vx3) + UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq);
-    feq[DIR_MPP] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 + vx2 + vx3) + UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+
+    feq[DIR_000] = vf::lbm::constant::c8o27 * (drho - cu_sq);
+    feq[DIR_P00]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (vx1) + vf::lbm::constant::c9o2 * (vx1) * (vx1)-cu_sq);
+    feq[DIR_M00]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx1) + vf::lbm::constant::c9o2 * (-vx1) * (-vx1) - cu_sq);
+    feq[DIR_0P0]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (vx2) + vf::lbm::constant::c9o2 * (vx2) * (vx2)-cu_sq);
+    feq[DIR_0M0]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx2) + vf::lbm::constant::c9o2 * (-vx2) * (-vx2) - cu_sq);
+    feq[DIR_00P]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (vx3) + vf::lbm::constant::c9o2 * (vx3) * (vx3)-cu_sq);
+    feq[DIR_00M]    = vf::lbm::constant::c2o27 * (drho + 3.0 * (-vx3) + vf::lbm::constant::c9o2 * (-vx3) * (-vx3) - cu_sq);
+    feq[DIR_PP0]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx1 + vx2) + vf::lbm::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq);
+    feq[DIR_MM0]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx1 - vx2) + vf::lbm::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq);
+    feq[DIR_PM0]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx1 - vx2) + vf::lbm::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq);
+    feq[DIR_MP0]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx1 + vx2) + vf::lbm::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq);
+    feq[DIR_P0P]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx1 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq);
+    feq[DIR_M0M]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx1 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq);
+    feq[DIR_P0M]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx1 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq);
+    feq[DIR_M0P]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx1 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq);
+    feq[DIR_0PP]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx2 + vx3) + vf::lbm::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq);
+    feq[DIR_0MM]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq);
+    feq[DIR_0PM]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (vx2 - vx3) + vf::lbm::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq);
+    feq[DIR_0MP]   = vf::lbm::constant::c1o54 * (drho + 3.0 * (-vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq);
+    feq[DIR_PPP]  = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (vx1 + vx2 + vx3) + vf::lbm::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq);
+    feq[DIR_MMM] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (-vx1 - vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
+    feq[DIR_PPM] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (vx1 + vx2 - vx3) + vf::lbm::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq);
+    feq[DIR_MMP] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (-vx1 - vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
+    feq[DIR_PMP] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (vx1 - vx2 + vx3) + vf::lbm::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq);
+    feq[DIR_MPM] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (-vx1 + vx2 - vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
+    feq[DIR_PMM] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (vx1 - vx2 - vx3) + vf::lbm::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq);
+    feq[DIR_MPP] = vf::lbm::constant::c1o216 *
+               (drho + 3.0 * (-vx1 + vx2 + vx3) + vf::lbm::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
 }
 //////////////////////////////////////////////////////////////////////////
-static inline float getBoundaryVelocityForDirection(const int &direction, const float &bcVelocityX1,
-                                                    const float &bcVelocityX2, const float &bcVelocityX3)
+static inline real getBoundaryVelocityForDirection(const int &direction, const real &bcVelocityX1,
+                                                    const real &bcVelocityX2, const real &bcVelocityX3)
 {
+    using namespace vf::lbm::dir;
+ 
     switch (direction) {
         case DIR_P00:
-            return (float)(UbMath::c4o9 * (+bcVelocityX1));
+            return (real)(vf::lbm::constant::c4o9 * (+bcVelocityX1));
         case DIR_M00:
-            return (float)(UbMath::c4o9 * (-bcVelocityX1));
+            return (real)(vf::lbm::constant::c4o9 * (-bcVelocityX1));
         case DIR_0P0:
-            return (float)(UbMath::c4o9 * (+bcVelocityX2));
+            return (real)(vf::lbm::constant::c4o9 * (+bcVelocityX2));
         case DIR_0M0:
-            return (float)(UbMath::c4o9 * (-bcVelocityX2));
+            return (real)(vf::lbm::constant::c4o9 * (-bcVelocityX2));
         case DIR_00P:
-            return (float)(UbMath::c4o9 * (+bcVelocityX3));
+            return (real)(vf::lbm::constant::c4o9 * (+bcVelocityX3));
         case DIR_00M:
-            return (float)(UbMath::c4o9 * (-bcVelocityX3));
+            return (real)(vf::lbm::constant::c4o9 * (-bcVelocityX3));
         case DIR_PP0:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX1 + bcVelocityX2));
         case DIR_MM0:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX1 - bcVelocityX2));
         case DIR_PM0:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX1 - bcVelocityX2));
         case DIR_MP0:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX1 + bcVelocityX2));
         case DIR_P0P:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX1 + bcVelocityX3));
         case DIR_M0M:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX1 - bcVelocityX3));
         case DIR_P0M:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX1 - bcVelocityX3));
         case DIR_M0P:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX1 + bcVelocityX3));
         case DIR_0PP:
-            return (float)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX2 + bcVelocityX3));
         case DIR_0MM:
-            return (float)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX2 - bcVelocityX3));
         case DIR_0PM:
-            return (float)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (+bcVelocityX2 - bcVelocityX3));
         case DIR_0MP:
-            return (float)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o9 * (-bcVelocityX2 + bcVelocityX3));
         case DIR_PPP:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
         case DIR_MMM:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
         case DIR_PPM:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
         case DIR_MMP:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
         case DIR_PMP:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
         case DIR_MPM:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
         case DIR_PMM:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
         case DIR_MPP:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::lbm::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
         default:
             throw UbException(UB_EXARGS, "unknown direction");
     }
@@ -822,8 +861,9 @@ static inline float getBoundaryVelocityForDirection(const int &direction, const
 static const int &getInvertDirection(const int &direction)
 {
 #ifdef _DEBUG
-    if (direction < STARTDIR || direction > ENDDIR)
-        throw UbException(UB_EXARGS, "unknown direction");
+ //   if (direction < STARTDIR || direction > ENDDIR)
+     if (direction < FSTARTDIR || direction > FENDDIR)
+       throw UbException(UB_EXARGS, "unknown direction");
 #endif
     return INVDIR[direction];
 }
@@ -839,48 +879,52 @@ static void getLBMDirections(std::vector<int> &dirs, bool onlyLBdirs = false)
     } else /*STARTDIR->ENDDIR*/
     {
         dirs.resize(ENDDIR + 1);
-        for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+        for (int dir = STARTF; dir <= ENDF; ++dir)
             dirs[dir] = dir;
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static std::vector<int> getEX(const int &exn)
+static std::vector<int> getDX(const int &exn)
 {
     std::vector<int> ex;
     ex.resize(ENDDIR + 1);
     switch (exn) {
         case 1:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX1[dir];
             break;
         case 2:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX2[dir];
             break;
         case 3:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX3[dir];
             break;
     }
     return ex;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const double &deltaX1)
+static inline void calcDistanceToNeighbors(std::vector<real> &distNeigh, const real &deltaX1)
 {
+    using namespace vf::lbm::dir;
+
     // distNeigh.resize(FENDDIR+1, UbMath::sqrt2*deltaX1);
 
     distNeigh[DIR_P00] = distNeigh[DIR_M00] = distNeigh[DIR_0P0] = deltaX1;
     distNeigh[DIR_0M0] = distNeigh[DIR_00P] = distNeigh[DIR_00M] = deltaX1;
-    distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] = distNeigh[DIR_PM0] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] = distNeigh[DIR_0MP] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] = distNeigh[DIR_0MM] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] = distNeigh[DIR_MMP] = UbMath::sqrt3 * deltaX1;
-    distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] = distNeigh[DIR_MMM] = UbMath::sqrt3 * deltaX1;
+    distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] = distNeigh[DIR_PM0] = vf::lbm::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] = distNeigh[DIR_0MP] = vf::lbm::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] = distNeigh[DIR_0MM] = vf::lbm::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] = distNeigh[DIR_MMP] = vf::lbm::constant::sqrt3 * deltaX1;
+    distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] = distNeigh[DIR_MMM] = vf::lbm::constant::sqrt3 * deltaX1;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const double &deltaX1, const double &deltaX2,
-                                           const double &deltaX3)
+static inline void calcDistanceToNeighbors(std::vector<real> &distNeigh, const real &deltaX1, const real &deltaX2,
+                                           const real &deltaX3)
 {
+    using namespace vf::lbm::dir;
+
     // distNeigh.resize(FENDDIR+1, UbMath::sqrt2*deltaX1);
     distNeigh[DIR_P00] = distNeigh[DIR_M00] = deltaX1;
     distNeigh[DIR_0P0] = distNeigh[DIR_0M0] = deltaX2;
@@ -894,11 +938,13 @@ static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const
         sqrt(deltaX1 * deltaX1 + deltaX2 * deltaX2 + deltaX3 * deltaX3);
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void initRayVectors(double *const &rayX1, double *const &rayX2, double *const &rayX3)
+static inline void initRayVectors(real *const &rayX1, real *const &rayX2, real *const &rayX3)
 {
+    using namespace vf::lbm::dir;
+
     int fdir;
-    double c1oS2 = UbMath::one_over_sqrt2;
-    double c1oS3 = UbMath::one_over_sqrt3;
+    real c1oS2 = vf::lbm::constant::one_over_sqrt2;
+    real c1oS3 = vf::lbm::constant::one_over_sqrt3;
     fdir         = DIR_P00;
     rayX1[fdir]  = 1.0;
     rayX2[fdir]  = 0.0;
@@ -1005,64 +1051,68 @@ static inline void initRayVectors(double *const &rayX1, double *const &rayX2, do
     rayX3[fdir]  = -c1oS3;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline LBMReal calcPress(const LBMReal *const f, LBMReal rho, LBMReal vx1, LBMReal vx2, LBMReal vx3)
+static inline real calcPress(const real *const f, real rho, real vx1, real vx2, real vx3)
 {
-    LBMReal op = 1.0;
+    using namespace vf::lbm::dir;
+
+    real op = 1.0;
     return ((f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] +
              2. * (f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] + f[DIR_MP0] + f[DIR_P0P] + f[DIR_M0M] + f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] + f[DIR_0MM] + f[DIR_0PM] + f[DIR_0MP]) +
              3. * (f[DIR_PPP] + f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] + f[DIR_PPM] + f[DIR_MMM] + f[DIR_PMM] + f[DIR_MPM]) -
              (vx1 * vx1 + vx2 * vx2 + vx3 * vx3)) *
                 (1 - 0.5 * op) +
             op * 0.5 * (rho)) *
-           UbMath::c1o3;
+           vf::lbm::constant::c1o3;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
+static inline real getShearRate(const real *const f, real collFactorF)
 {
-    LBMReal mfcbb = f[DIR_P00];
-    LBMReal mfbcb = f[DIR_0P0];
-    LBMReal mfbbc = f[DIR_00P];
-    LBMReal mfccb = f[DIR_PP0];
-    LBMReal mfacb = f[DIR_MP0];
-    LBMReal mfcbc = f[DIR_P0P];
-    LBMReal mfabc = f[DIR_M0P];
-    LBMReal mfbcc = f[DIR_0PP];
-    LBMReal mfbac = f[DIR_0MP];
-    LBMReal mfccc = f[DIR_PPP];
-    LBMReal mfacc = f[DIR_MPP];
-    LBMReal mfcac = f[DIR_PMP];
-    LBMReal mfaac = f[DIR_MMP];
-
-    LBMReal mfabb = f[DIR_M00];
-    LBMReal mfbab = f[DIR_0M0];
-    LBMReal mfbba = f[DIR_00M];
-    LBMReal mfaab = f[DIR_MM0];
-    LBMReal mfcab = f[DIR_PM0];
-    LBMReal mfaba = f[DIR_M0M];
-    LBMReal mfcba = f[DIR_P0M];
-    LBMReal mfbaa = f[DIR_0MM];
-    LBMReal mfbca = f[DIR_0PM];
-    LBMReal mfaaa = f[DIR_MMM];
-    LBMReal mfcaa = f[DIR_PMM];
-    LBMReal mfaca = f[DIR_MPM];
-    LBMReal mfcca = f[DIR_PPM];
-
-    LBMReal mfbbb = f[DIR_000];
-
-    LBMReal m0, m1, m2;
-
-    LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) +
+    using namespace vf::lbm::dir;
+
+    real mfcbb = f[DIR_P00];
+    real mfbcb = f[DIR_0P0];
+    real mfbbc = f[DIR_00P];
+    real mfccb = f[DIR_PP0];
+    real mfacb = f[DIR_MP0];
+    real mfcbc = f[DIR_P0P];
+    real mfabc = f[DIR_M0P];
+    real mfbcc = f[DIR_0PP];
+    real mfbac = f[DIR_0MP];
+    real mfccc = f[DIR_PPP];
+    real mfacc = f[DIR_MPP];
+    real mfcac = f[DIR_PMP];
+    real mfaac = f[DIR_MMP];
+
+    real mfabb = f[DIR_M00];
+    real mfbab = f[DIR_0M0];
+    real mfbba = f[DIR_00M];
+    real mfaab = f[DIR_MM0];
+    real mfcab = f[DIR_PM0];
+    real mfaba = f[DIR_M0M];
+    real mfcba = f[DIR_P0M];
+    real mfbaa = f[DIR_0MM];
+    real mfbca = f[DIR_0PM];
+    real mfaaa = f[DIR_MMM];
+    real mfcaa = f[DIR_PMM];
+    real mfaca = f[DIR_MPM];
+    real mfcca = f[DIR_PPM];
+
+    real mfbbb = f[DIR_000];
+
+    real m0, m1, m2;
+
+    real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) +
                   (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) +
                   (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-    LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb));
-    LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab));
-    LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba));
 
-    LBMReal oMdrho;
+    real oMdrho;
 
     oMdrho = mfccc + mfaaa;
     m0     = mfaca + mfcac;
@@ -1090,9 +1140,9 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m0 += mfbbb; // hat gefehlt
     oMdrho = 1. - (oMdrho + m0);
 
-    LBMReal vx2;
-    LBMReal vy2;
-    LBMReal vz2;
+    real vx2;
+    real vy2;
+    real vz2;
     vx2 = vvx * vvx;
     vy2 = vvy * vvy;
     vz2 = vvz * vvz;
@@ -1106,7 +1156,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfaac - mfaaa;
     m0    = m2 + mfaab;
     mfaaa = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::lbm::constant::c1o36 * oMdrho;
     mfaab = m1 - m0 * vvz;
     mfaac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1114,7 +1164,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfabc - mfaba;
     m0    = m2 + mfabb;
     mfaba = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::lbm::constant::c1o9 * oMdrho;
     mfabb = m1 - m0 * vvz;
     mfabc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1122,7 +1172,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfacc - mfaca;
     m0    = m2 + mfacb;
     mfaca = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::lbm::constant::c1o36 * oMdrho;
     mfacb = m1 - m0 * vvz;
     mfacc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1131,7 +1181,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbac - mfbaa;
     m0    = m2 + mfbab;
     mfbaa = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::lbm::constant::c1o9 * oMdrho;
     mfbab = m1 - m0 * vvz;
     mfbac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1139,7 +1189,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbbc - mfbba;
     m0    = m2 + mfbbb;
     mfbba = m0;
-    m0 += UbMath::c4o9 * oMdrho;
+    m0 += vf::lbm::constant::c4o9 * oMdrho;
     mfbbb = m1 - m0 * vvz;
     mfbbc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1147,7 +1197,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbcc - mfbca;
     m0    = m2 + mfbcb;
     mfbca = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::lbm::constant::c1o9 * oMdrho;
     mfbcb = m1 - m0 * vvz;
     mfbcc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1156,7 +1206,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcac - mfcaa;
     m0    = m2 + mfcab;
     mfcaa = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::lbm::constant::c1o36 * oMdrho;
     mfcab = m1 - m0 * vvz;
     mfcac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1164,7 +1214,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcbc - mfcba;
     m0    = m2 + mfcbb;
     mfcba = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::lbm::constant::c1o9 * oMdrho;
     mfcbb = m1 - m0 * vvz;
     mfcbc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1172,7 +1222,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfcca;
     m0    = m2 + mfccb;
     mfcca = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::lbm::constant::c1o36 * oMdrho;
     mfccb = m1 - m0 * vvz;
     mfccc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1184,7 +1234,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfaca - mfaaa;
     m0    = m2 + mfaba;
     mfaaa = m0;
-    m0 += UbMath::c1o6 * oMdrho;
+    m0 += vf::lbm::constant::c1o6 * oMdrho;
     mfaba = m1 - m0 * vvy;
     mfaca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1199,7 +1249,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfacc - mfaac;
     m0    = m2 + mfabc;
     mfaac = m0;
-    m0 += UbMath::c1o18 * oMdrho;
+    m0 += vf::lbm::constant::c1o18 * oMdrho;
     mfabc = m1 - m0 * vvy;
     mfacc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1208,7 +1258,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbca - mfbaa;
     m0    = m2 + mfbba;
     mfbaa = m0;
-    m0 += UbMath::c2o3 * oMdrho;
+    m0 += vf::lbm::constant::c2o3 * oMdrho;
     mfbba = m1 - m0 * vvy;
     mfbca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1223,7 +1273,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbcc - mfbac;
     m0    = m2 + mfbbc;
     mfbac = m0;
-    m0 += UbMath::c2o9 * oMdrho;
+    m0 += vf::lbm::constant::c2o9 * oMdrho;
     mfbbc = m1 - m0 * vvy;
     mfbcc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1232,7 +1282,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcca - mfcaa;
     m0    = m2 + mfcba;
     mfcaa = m0;
-    m0 += UbMath::c1o6 * oMdrho;
+    m0 += vf::lbm::constant::c1o6 * oMdrho;
     mfcba = m1 - m0 * vvy;
     mfcca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1247,7 +1297,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfcac;
     m0    = m2 + mfcbc;
     mfcac = m0;
-    m0 += UbMath::c1o18 * oMdrho;
+    m0 += vf::lbm::constant::c1o18 * oMdrho;
     mfcbc = m1 - m0 * vvy;
     mfccc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1274,7 +1324,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcca - mfaca;
     m0    = m2 + mfbca;
     mfaca = m0;
-    m0 += UbMath::c1o3 * oMdrho;
+    m0 += vf::lbm::constant::c1o3 * oMdrho;
     mfbca = m1 - m0 * vvx;
     mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1305,7 +1355,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcac - mfaac;
     m0    = m2 + mfbac;
     mfaac = m0;
-    m0 += UbMath::c1o3 * oMdrho;
+    m0 += vf::lbm::constant::c1o3 * oMdrho;
     mfbac = m1 - m0 * vvx;
     mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1320,36 +1370,38 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfacc;
     m0    = m2 + mfbcc;
     mfacc = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::lbm::constant::c1o9 * oMdrho;
     mfbcc = m1 - m0 * vvx;
     mfccc = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
     // Cumulants
     ////////////////////////////////////////////////////////////////////////////////////
-    LBMReal OxxPyyPzz = 1.; // omega2 or bulk viscosity
+    real OxxPyyPzz = 1.; // omega2 or bulk viscosity
 
-    LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-    LBMReal mxxMyy    = mfcaa - mfaca;
-    LBMReal mxxMzz    = mfcaa - mfaac;
+    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+    real mxxMyy    = mfcaa - mfaca;
+    real mxxMzz    = mfcaa - mfaac;
 
-    LBMReal dxux = -UbMath::c1o2 * collFactorF * (mxxMyy + mxxMzz) + UbMath::c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-    LBMReal dyuy = dxux + collFactorF * UbMath::c3o2 * mxxMyy;
-    LBMReal dzuz = dxux + collFactorF * UbMath::c3o2 * mxxMzz;
+    real dxux = -vf::lbm::constant::c1o2 * collFactorF * (mxxMyy + mxxMzz) + vf::lbm::constant::c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+    real dyuy = dxux + collFactorF * vf::lbm::constant::c3o2 * mxxMyy;
+    real dzuz = dxux + collFactorF * vf::lbm::constant::c3o2 * mxxMzz;
 
-    LBMReal Dxy = -UbMath::three * collFactorF * mfbba;
-    LBMReal Dxz = -UbMath::three * collFactorF * mfbab;
-    LBMReal Dyz = -UbMath::three * collFactorF * mfabb;
+    real Dxy = -vf::lbm::constant::c3o1 * collFactorF * mfbba;
+    real Dxz = -vf::lbm::constant::c3o1 * collFactorF * mfbab;
+    real Dyz = -vf::lbm::constant::c3o1 * collFactorF * mfabb;
 
-    return sqrt(UbMath::c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) /
-           (rho + UbMath::one);
+    return sqrt(vf::lbm::constant::c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) /
+           (rho + vf::lbm::constant::c1o1);
 }
 //Multiphase stuff
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseFeq(LBMReal *const &feq /*[27]*/, const LBMReal &rho, const LBMReal &p1, const LBMReal &vx1,
-                              const LBMReal &vx2, const LBMReal &vx3)
+static void calcMultiphaseFeq(real *const &feq /*[27]*/, const real &rho, const real &p1, const real &vx1,
+                              const real &vx2, const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    using namespace vf::lbm::constant;
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     feq[DIR_000] = c8o27 * (p1 + rho * c1o3 * (-cu_sq));
     feq[DIR_P00]    = c2o27 * (p1 + rho * c1o3 * (3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq));
@@ -1388,11 +1440,13 @@ static void calcMultiphaseFeq(LBMReal *const &feq /*[27]*/, const LBMReal &rho,
                (p1 + rho * c1o3 * (3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseFeqVB(LBMReal *const &feq /*[27]*/, const LBMReal &p1, const LBMReal &vx1, const LBMReal &vx2,
-                                const LBMReal &vx3)
+static void calcMultiphaseFeqVB(real *const &feq /*[27]*/, const real &p1, const real &vx1, const real &vx2,
+                                const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    using namespace vf::lbm::constant;
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     feq[DIR_000] = p1 + c8o27 * (-cu_sq);
     feq[DIR_P00]    = c2o27 * ((3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq));
@@ -1423,11 +1477,13 @@ static void calcMultiphaseFeqVB(LBMReal *const &feq /*[27]*/, const LBMReal &p1,
     feq[DIR_MPP]  = c1o216 * ((3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseHeq(LBMReal *const &heq /*[27]*/, const LBMReal &phi, const LBMReal &vx1, const LBMReal &vx2,
-                              const LBMReal &vx3)
+static void calcMultiphaseHeq(real *const &heq /*[27]*/, const real &phi, const real &vx1, const real &vx2,
+                              const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     heq[DIR_000] = c8o27 * phi * (1.0 - cu_sq);
     heq[DIR_P00]    = c2o27 * phi * (1.0 + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq);
diff --git a/src/cpu/VirtualFluidsCore/LBM/ICell.h b/src/cpu/VirtualFluidsCore/LBM/ICell.h
index c080033b8bbdbe1741f6f624c2726ffa145bf080..e9b07b1eee9b5819c95203ceabe4af1afd562e7d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ICell.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ICell.h
@@ -7,14 +7,14 @@
 struct ICell3D {
     ICell3D(int size);
 
-    std::vector<LBMReal> TSW;
-    std::vector<LBMReal> TNW;
-    std::vector<LBMReal> TNE;
-    std::vector<LBMReal> TSE;
-    std::vector<LBMReal> BSW;
-    std::vector<LBMReal> BNW;
-    std::vector<LBMReal> BNE;
-    std::vector<LBMReal> BSE;
+    std::vector<real> TSW;
+    std::vector<real> TNW;
+    std::vector<real> TNE;
+    std::vector<real> TSE;
+    std::vector<real> BSW;
+    std::vector<real> BNW;
+    std::vector<real> BNE;
+    std::vector<real> BSE;
 };
 
 inline ICell3D::ICell3D(int size)
diff --git a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
index bde61d9d314b61327ff8f8a2a71d2864d50cc7f5..ab047a86081f9afc1a099eea0087f2728bfa519d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
@@ -48,18 +48,18 @@ public:
     virtual ~ILBMKernel() = default;
 
     virtual void calculate(int step)    = 0;
-    virtual double getCalculationTime() = 0;
+    virtual real getCalculationTime() = 0;
     virtual void swapDistributions()    = 0;
 
     virtual bool getCompressible() const                                             = 0;
     virtual SPtr<BCProcessor> getBCProcessor() const                                 = 0;
     virtual void setBCProcessor(SPtr<BCProcessor> bcProcessor)                       = 0;
     virtual SPtr<DataSet3D> getDataSet() const                                       = 0;
-    virtual double getCollisionFactor() const                                        = 0;
-    virtual void setCollisionFactor(double collFactor)                               = 0;
+    virtual real getCollisionFactor() const                                        = 0;
+    virtual void setCollisionFactor(real collFactor)                               = 0;
     virtual bool isInsideOfDomain(const int &x1, const int &x2, const int &x3) const = 0;
     virtual int getGhostLayerWidth() const                                           = 0;
-    virtual LBMReal getDeltaT() const                                                = 0;
+    virtual real getDeltaT() const                                                = 0;
     virtual bool getWithForcing() const                                              = 0;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
index ec4b9bbd4f177a3d0fdbd0c3f1d4c3d7775fface..782d5c96856b1c623d17a453f9552d10ab0a04ad 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 IncompressibleCumulantLBMKernel::IncompressibleCumulantLBMKernel()
@@ -135,49 +136,49 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               LBMReal m0, m1, m2;
-
-               LBMReal rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               real m0, m1, m2;
+
+               real rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
 
-               LBMReal vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+               real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
                   (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
                   (mfcbb-mfabb));
-               LBMReal vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+               real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
                   (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
                   (mfbcb-mfbab));
-               LBMReal vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+               real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
                   (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
                   (mfbbc-mfbba));
 
@@ -185,9 +186,9 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -198,7 +199,7 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                   vvz += forcingX3*deltaT*0.5; // Z
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
-               LBMReal oMdrho;
+               real oMdrho;
 
                oMdrho=mfccc+mfaaa;
                m0=mfaca+mfcac;
@@ -226,15 +227,15 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                m0+=mfbbb; //hat gefehlt
                oMdrho = 1. - (oMdrho + m0);
 
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2=vvx*vvx;
                vy2=vvy*vvy;
                vz2=vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimit = 0.01;
+               real wadjust;
+               real qudricLimit = 0.01;
                ////////////////////////////////////////////////////////////////////////////////////
                //Hin
                ////////////////////////////////////////////////////////////////////////////////////
@@ -465,33 +466,33 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Cumulants
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-               LBMReal OxyyPxzz  = 1.;//-s9;//2+s9;//
-               //LBMReal OxyyMxzz  = 1.;//2+s9;//
-               LBMReal O4        = 1.;
-               LBMReal O5        = 1.;
-               LBMReal O6        = 1.;
+               real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+               real OxyyPxzz  = 1.;//-s9;//2+s9;//
+               //real OxyyMxzz  = 1.;//2+s9;//
+               real O4        = 1.;
+               real O5        = 1.;
+               real O6        = 1.;
 
                //Cum 4.
                //LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
                //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
                //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-               LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-               LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-               LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+               real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+               real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+               real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-               LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
-               LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
-               LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
 
                //Cum 5.
-               LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-               LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-               LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+               real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+               real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+               real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
                //Cum 6.
-               LBMReal CUMccc = mfccc  +((-4. *  mfbbb * mfbbb
+               real CUMccc = mfccc  +((-4. *  mfbbb * mfbbb
                   -       (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
                   -  4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
                   -  2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -505,13 +506,13 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
 
                //2.
                // linear combinations
-               LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-               LBMReal mxxMyy    = mfcaa - mfaca;
-               LBMReal mxxMzz         = mfcaa - mfaac;
+               real mxxPyyPzz = mfcaa + mfaca + mfaac;
+               real mxxMyy    = mfcaa - mfaca;
+               real mxxMzz         = mfcaa - mfaac;
 
-               LBMReal dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux + collFactor * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux + collFactor * c3o2 * mxxMzz;
+               real dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+               real dyuy = dxux + collFactor * c3o2 * mxxMyy;
+               real dzuz = dxux + collFactor * c3o2 * mxxMzz;
 
                //relax
                mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -529,14 +530,14 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
 
                //3.
                // linear combinations
-               LBMReal mxxyPyzz = mfcba + mfabc;
-               LBMReal mxxyMyzz = mfcba - mfabc;
+               real mxxyPyzz = mfcba + mfabc;
+               real mxxyMyzz = mfcba - mfabc;
 
-               LBMReal mxxzPyyz = mfcab + mfacb;
-               LBMReal mxxzMyyz = mfcab - mfacb;
+               real mxxzPyyz = mfcab + mfacb;
+               real mxxzMyyz = mfcab - mfacb;
 
-               LBMReal mxyyPxzz = mfbca + mfbac;
-               LBMReal mxyyMxzz = mfbca - mfbac;
+               real mxyyPxzz = mfbca + mfbac;
+               real mxyyMxzz = mfbca - mfbac;
 
                //relax
                wadjust    = OxyyMxzz+(1.-OxyyMxzz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimit);
@@ -831,11 +832,11 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
-               //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = rho - rho_post;
+               //real dif = fabs(rho - rho_post);
+               real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -889,7 +890,7 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double IncompressibleCumulantLBMKernel::getCalculationTime()
+real IncompressibleCumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
index d7c3c78a6ffb2c27b99fbf603f5561dff0171c29..5abe9afc29caf6fe178d6b0e7c3b44e373c6defb 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
@@ -23,27 +23,27 @@ public:
    ~IncompressibleCumulantLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setRelaxationParameter(Parameter p);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
index 90bc1998454e980c86054934222b251699f1412a..ed77717f6573932d8a247863dc69bfafc462b555 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;    
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 IncompressibleCumulantWithSpongeLayerLBMKernel::IncompressibleCumulantWithSpongeLayerLBMKernel()
@@ -23,7 +24,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initDataSet()
    dataSet->setFdistributions(d);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdir, double vL1, double vdx, double vSP)
+void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdir, real vL1, real vdx, real vSP)
 {
    direction = vdir;
    L1 = vL1;
@@ -31,18 +32,20 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdi
    SP = vSP;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, double vL1, double vdx, double vSP)
+void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, real vL1, real vdx, real vSP)
 {
+    using namespace vf::lbm::dir;
+
    direction = vdir;
    L1 = vL1;
    dx = vdx;
    SP = vSP;
 
-   double sizeX = L1 / dx;
-   double sizeSP = SP / dx;
-   double muX1, muX2, muX3;
+   real sizeX = L1 / dx;
+   real sizeSP = SP / dx;
+   real muX1, muX2, muX3;
 
-   LBMReal spongeFactor;
+   real spongeFactor;
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -57,7 +60,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, d
    int maxX2 = bcArrayMaxX2 - ghostLayerWidth - 1;
    int maxX3 = bcArrayMaxX3 - ghostLayerWidth - 1;
 
-   SPtr<RelaxationFactorArray3D> relaxationFactorPtr = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3));
+   SPtr<RelaxationFactorArray3D> relaxationFactorPtr = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3));
    dataSet->setRelaxationFactor(relaxationFactorPtr);
 
    for (int x3 = minX3; x3 < maxX3; x3++)
@@ -68,38 +71,38 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, d
          {
             switch (direction)
             {
-            case D3Q27System::DIR_P00:
-               muX1 = (double)(x1 + ix1 * maxX1);
+            case DIR_P00:
+               muX1 = (real)(x1 + ix1 * maxX1);
                if (muX1 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX1 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_M00:
-               muX1 = (double)(x1 + ix1 * maxX1);
+            case DIR_M00:
+               muX1 = (real)(x1 + ix1 * maxX1);
                if (muX1 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX1 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_0P0:
-               muX2 = (double)(x2 + ix2 * maxX2);
+            case DIR_0P0:
+               muX2 = (real)(x2 + ix2 * maxX2);
                if (muX2 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX2 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_0M0:
-               muX2 = (double)(x2 + ix2 * maxX2);
+            case DIR_0M0:
+               muX2 = (real)(x2 + ix2 * maxX2);
                if (muX2 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX2 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_00P:
-               muX3 = (double)(x3 + ix3 * maxX3);
+            case DIR_00P:
+               muX3 = (real)(x3 + ix3 * maxX3);
                if (muX3 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX3 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_00M:
-               muX3 = (double)(x3 + ix3 * maxX3);
+            case DIR_00M:
+               muX3 = (real)(x3 + ix3 * maxX3);
                if (muX3 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX3 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
@@ -204,8 +207,8 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth-1;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth-1;
 
-   LBMReal collFactor0 = collFactor;
-   LBMReal spongeFactor;
+   real collFactor0 = collFactor;
+   real spongeFactor;
 
    for(int x3 = minX3; x3 <= maxX3; x3++)
    {
@@ -231,49 +234,49 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //Rest ist b
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1,x2,x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N,x1,x2,x3); 
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T,x1,x2,x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE,x1,x2,x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE,x1,x2,x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p,x2,x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN,x1,x2,x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS,x1,x2p,x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE,x1,x2,x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE,x1,x2p,x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,x3  );
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,x2p,x3  );
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,x2,x3p  );
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3 );
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,x2p,x3 );
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,x3p );
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,x2,x3p );
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,x2p,x3p );
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,x2,x3p );
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,x2p,x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,x2,x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1,x2,x3);
-
-               LBMReal m0, m1, m2;
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1,x2,x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N,x1,x2,x3); 
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T,x1,x2,x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE,x1,x2,x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE,x1,x2,x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p,x2,x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN,x1,x2,x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS,x1,x2p,x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE,x1,x2,x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE,x1,x2p,x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,x3  );
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,x2p,x3  );
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,x2,x3p  );
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3 );
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,x2p,x3 );
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,x3p );
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,x2,x3p );
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,x2p,x3p );
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,x2,x3p );
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,x2p,x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,x2,x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1,x2,x3);
+
+               real m0, m1, m2;
                
-               LBMReal rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
 
-               LBMReal vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+               real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
                   (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
                   (mfcbb-mfabb));
-               LBMReal vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+               real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
                   (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
                   (mfbcb-mfbab));
-               LBMReal vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+               real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
                   (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
                   (mfbbc-mfbba));
                //////////////////////////////////////////////////////////////////////////
@@ -323,7 +326,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                //}
                //////////////////////////////////////////////////////////////////////////
 
-               LBMReal oMdrho;
+               real oMdrho;
 
                oMdrho=mfccc+mfaaa;
                m0=mfaca+mfcac;
@@ -351,15 +354,15 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                m0+=mfbbb; //hat gefehlt
                oMdrho = 1. - (oMdrho + m0);
 
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2=vvx*vvx;
                vy2=vvy*vvy;
                vz2=vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimit = 0.01;
+               real wadjust;
+               real qudricLimit = 0.01;
                ////////////////////////////////////////////////////////////////////////////////////
                //Hin
                ////////////////////////////////////////////////////////////////////////////////////
@@ -590,29 +593,29 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Cumulants
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal OxxPyyPzz = 1.;
-               LBMReal OxyyPxzz  = 1.;//-s9;//2+s9;//
-               //LBMReal OxyyMxzz  = 1.;//2+s9;//
-               LBMReal O4        = 1.;
-               LBMReal O5        = 1.;
-               LBMReal O6        = 1.;
+               real OxxPyyPzz = 1.;
+               real OxyyPxzz  = 1.;//-s9;//2+s9;//
+               //real OxyyMxzz  = 1.;//2+s9;//
+               real O4        = 1.;
+               real O5        = 1.;
+               real O6        = 1.;
 
                //Cum 4.
-               LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab);
-               LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb);
-               LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb);
+               real CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab);
+               real CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb);
+               real CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb);
 
-               LBMReal CUMcca = mfcca - (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
-               LBMReal CUMcac = mfcac - (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
-               LBMReal CUMacc = mfacc - (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMcca = mfcca - (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMcac = mfcac - (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMacc = mfacc - (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
 
                //Cum 5.
-               LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-               LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-               LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+               real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+               real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+               real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
                //Cum 6.
-               LBMReal CUMccc = mfccc  +((-4. *  mfbbb * mfbbb 
+               real CUMccc = mfccc  +((-4. *  mfbbb * mfbbb 
                   -       (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
                   -  4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
                   -  2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -626,13 +629,13 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //2.
                // linear combinations
-               LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-               LBMReal mxxMyy    = mfcaa - mfaca;
-               LBMReal mxxMzz         = mfcaa - mfaac;
+               real mxxPyyPzz = mfcaa + mfaca + mfaac;
+               real mxxMyy    = mfcaa - mfaca;
+               real mxxMzz         = mfcaa - mfaac;
 
-               LBMReal dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux + collFactor * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux + collFactor * c3o2 * mxxMzz;
+               real dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+               real dyuy = dxux + collFactor * c3o2 * mxxMyy;
+               real dzuz = dxux + collFactor * c3o2 * mxxMzz;
 
                //relax
                mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -650,14 +653,14 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //3.
                // linear combinations
-               LBMReal mxxyPyzz = mfcba + mfabc;
-               LBMReal mxxyMyzz = mfcba - mfabc;
+               real mxxyPyzz = mfcba + mfabc;
+               real mxxyMyzz = mfcba - mfabc;
 
-               LBMReal mxxzPyyz = mfcab + mfacb;
-               LBMReal mxxzMyyz = mfcab - mfacb;
+               real mxxzPyyz = mfcab + mfacb;
+               real mxxzMyyz = mfcab - mfacb;
 
-               LBMReal mxyyPxzz = mfbca + mfbac;
-               LBMReal mxyyMxzz = mfbca - mfbac;
+               real mxyyPxzz = mfbca + mfbac;
+               real mxyyMxzz = mfbca - mfbac;
 
                //relax
                wadjust    = OxyyMxzz+(1.-OxyyMxzz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimit);
@@ -948,11 +951,11 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb; 
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = rho - rho_post;
+               real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
                if(dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
index 2747cdc7673b6fb7aa3ade08162568f14c3e3ad1..760ad30fb5abb51b2f7d21dbc23d26b3124ac934 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
@@ -24,19 +24,19 @@ public:
    ~IncompressibleCumulantWithSpongeLayerLBMKernel() override;
    SPtr<LBMKernel> clone() override;
    void calculate(int step) override;
-   void initRelaxFactor(int vdir, double vL1, double vdx, double vSP);
+   void initRelaxFactor(int vdir, real vL1, real vdx, real vSP);
    //! \param vdir where the sponge layer is placed
    //! \param vL1 length of simulation domain
    //! \param vdx subgrid space 
    //! \param vSP length of sponge layer
-   void setRelaxFactorParam(int vdir, double vL1, double vdx, double vSP);
+   void setRelaxFactorParam(int vdir, real vL1, real vdx, real vSP);
 protected:
   void initDataSet() override;
-  LBMReal OxyyMxzz;
+  real OxyyMxzz;
   int direction;
-  double L1;
-  double dx;
-  double SP;
+  real L1;
+  real dx;
+  real SP;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
index 39b83f72a835ade4f903910a502383c6e3cd2323..c4759d786367fc9c5030898839b57cbec7bd48ec 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
@@ -4,7 +4,7 @@
 
 
 //////////////////////////////////////////////////////////////////////////
-IncompressibleOffsetInterpolationProcessor::IncompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+IncompressibleOffsetInterpolationProcessor::IncompressibleOffsetInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
 
@@ -19,13 +19,13 @@ InterpolationProcessorPtr IncompressibleOffsetInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void IncompressibleOffsetInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -35,7 +35,7 @@ void IncompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMRea
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -49,22 +49,23 @@ void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27IC
    calcInterpolatedNode(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void IncompressibleOffsetInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    //UBLOG(logINFO,"D3Q27System::DIR_M0M  = " << D3Q27System::DIR_M0M);
-   //UBLOG(logINFO,"BW  = " << BW);
+   //UBLOG(logINFO,"BW  = " << BW);;
 
-   LBMReal rho = 0.0;
+   real rho = 0.0;
    D3Q27System::calcIncompMacroscopicValues(f,rho,vx1,vx2,vx3);
    
    //////////////////////////////////////////////////////////////////////////
@@ -85,7 +86,7 @@ void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* cons
    kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))-(vx1*vx2));// might not be optimal MG 25.2.13
    kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))-(vx2*vx3));
    kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[D3Q27System::DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
    kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))-(vx1*vx1-vx3*vx3));
    //kxxMzz = -3./2.*omega*(((((f[NW]+f[SE])-(f[BS]+f[TN]))+((f[SW]+f[NE])-(f[17]+f[BN])))+((f[W]+f[DIR_P00])-(f[B]+f[T])))-(vx1*vx1-vx3*vx3));
 
@@ -100,25 +101,25 @@ void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* cons
    //UBLOG(logINFO,"f[TN]  = " << f[TN] << " TN  = " << TN);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -418,7 +419,7 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(con
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -526,21 +527,22 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(con
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(LBMReal* f, LBMReal  /*omega*/, LBMReal  /*x*/, LBMReal  /*y*/, LBMReal  /*z*/, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(real* f, real  /*omega*/, real  /*x*/, real  /*y*/, real  /*z*/, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingF*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -573,7 +575,7 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(LBMReal* f
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSW()
+real IncompressibleOffsetInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -586,7 +588,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSW()
+real IncompressibleOffsetInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -599,7 +601,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSE()
+real IncompressibleOffsetInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -612,7 +614,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSE()
+real IncompressibleOffsetInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -625,7 +627,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNW()
+real IncompressibleOffsetInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -638,7 +640,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNW()
+real IncompressibleOffsetInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -651,7 +653,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNE()
+real IncompressibleOffsetInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -664,7 +666,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNE()
+real IncompressibleOffsetInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -677,11 +679,12 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -689,22 +692,22 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal*
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingC*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
 //   LBMReal op = 1.;
 
    //f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
@@ -766,14 +769,14 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal*
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
index 6b024d419308e284eae4f334290b23dcd5b48218..866c0f6933e67d66b3b36d65a0f484ba2d8cbf86 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
@@ -16,51 +16,51 @@ class IncompressibleOffsetInterpolationProcessor : public InterpolationProcessor
 {
 public:
    IncompressibleOffsetInterpolationProcessor() = default;
-   IncompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   IncompressibleOffsetInterpolationProcessor(real omegaC, real omegaF);
    ~IncompressibleOffsetInterpolationProcessor() override = default;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
-   //LBMReal forcingC, forcingF;
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
+   //real forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
 //   LBMReal a,b,c;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNode(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNode(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -69,7 +69,7 @@ inline void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
index c37571337e537c324b557ac6c76680a63fc89b00..6c1c550fdce46eb91b7e33bedf5854c2d0ffe7b5 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
@@ -3,8 +3,10 @@
 #include "BCProcessor.h"
 #include "DataSet3D.h"
 #include "BCArray3D.h"
+#include "lbm/constants/NumericConstants.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 InitDensityLBMKernel::InitDensityLBMKernel()
 {
@@ -39,14 +41,14 @@ SPtr<LBMKernel> InitDensityLBMKernel::clone()
    return kernel;
 }
 
-void InitDensityLBMKernel::setVelocity(int x1, int x2, int x3, LBMReal vvx, LBMReal vvy, LBMReal vvz)
+void InitDensityLBMKernel::setVelocity(int x1, int x2, int x3, real vvx, real vvy, real vvz)
 {
    v(0, x1, x2, x3) = vvx;
    v(1, x1, x2, x3) = vvy;
    v(2, x1, x2, x3) = vvz;
 }
 
-double InitDensityLBMKernel::getCalculationTime()
+real InitDensityLBMKernel::getCalculationTime()
 {
    return 0;
 }
@@ -856,6 +858,7 @@ double InitDensityLBMKernel::getCalculationTime()
 void InitDensityLBMKernel::calculate(int  /*step*/)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    localDistributions = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
    nonLocalDistributions = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions();
@@ -863,9 +866,9 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
    SPtr<BoundaryConditions> bcPtr;
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal drho, vx1, vx2, vx3;
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real drho, vx1, vx2, vx3;
    const int bcArrayMaxX1 = (int)bcArray->getNX1();
    const int bcArrayMaxX2 = (int)bcArray->getNX2();
    const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -954,7 +957,7 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
                //vx2 = vx2+(vvy-vx2);
                //vx3 = vx3+(vvz-vx3);
 
-               LBMReal cu_sq = 1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
+               real cu_sq = 1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
 
                feq[DIR_000] = c8o27*(drho-cu_sq);
                feq[DIR_P00] = c2o27*(drho+3.0*(vx1)+c9o2*(vx1)*(vx1)-cu_sq);
@@ -1016,11 +1019,11 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
 
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = f[REST]+f[DIR_P00]+f[W]+f[N]+f[S]+f[T]+f[B]
+               real rho_post = f[REST]+f[DIR_P00]+f[W]+f[N]+f[S]+f[T]+f[B]
                   +f[NE]+f[SW]+f[SE]+f[NW]+f[TE]+f[BW]+f[BE]
                   +f[TW]+f[TN]+f[BS]+f[BN]+f[TS]+f[TNE]+f[TSW]
                   +f[TSE]+f[TNW]+f[BNE]+f[BSW]+f[BSE]+f[BNW];
-               LBMReal dif = drho-rho_post;
+               real dif = drho-rho_post;
 #ifdef SINGLEPRECISION
                if (dif>10.0E-7||dif<-10.0E-7)
 #else
@@ -1033,35 +1036,35 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
                //////////////////////////////////////////////////////////////////////////
                //write distribution
                //////////////////////////////////////////////////////////////////////////
-               (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::INV_P00];
-               (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::INV_0P0];
-               (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::INV_00P];
-               (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::INV_PP0];
-               (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = f[D3Q27System::INV_MP0];
-               (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::INV_P0P];
-               (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = f[D3Q27System::INV_M0P];
-               (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::INV_0PP];
-               (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = f[D3Q27System::INV_0MP];
-               (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::INV_PPP];
-               (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = f[D3Q27System::INV_MPP];
-               (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = f[D3Q27System::INV_PMP];
-               (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+               (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[INV_P00];
+               (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[INV_0P0];
+               (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[INV_00P];
+               (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[INV_PP0];
+               (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = f[INV_MP0];
+               (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[INV_P0P];
+               (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = f[INV_M0P];
+               (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[INV_0PP];
+               (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = f[INV_0MP];
+               (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[INV_PPP];
+               (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = f[INV_MPP];
+               (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = f[INV_PMP];
+               (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[INV_MMP];
 
-               (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = f[D3Q27System::INV_M00];
-               (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = f[D3Q27System::INV_0M0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = f[D3Q27System::INV_00M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = f[D3Q27System::INV_MM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = f[D3Q27System::INV_PM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = f[D3Q27System::INV_M0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = f[D3Q27System::INV_P0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = f[D3Q27System::INV_0MM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = f[D3Q27System::INV_0PM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = f[D3Q27System::INV_PMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = f[D3Q27System::INV_MPM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = f[D3Q27System::INV_PPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = f[INV_M00];
+               (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = f[INV_0M0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = f[INV_00M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = f[INV_MM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = f[INV_PM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = f[INV_M0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = f[INV_P0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = f[INV_0MM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = f[INV_0PM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[INV_MMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = f[INV_PMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = f[INV_MPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = f[INV_PPM];
 
-               (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+               (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
                //////////////////////////////////////////////////////////////////////////
 
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
index 71833e246353ff667ff025234b4a137fb905c5be..33255f8f5517e6a030cdb060d8397a6cf6cd8580 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
@@ -14,17 +14,17 @@ public:
    ~InitDensityLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void setVelocity(int x1, int x2, int x3, LBMReal vvx, LBMReal vvy, LBMReal vvz);
-   double getCalculationTime() override;
+   void setVelocity(int x1, int x2, int x3, real vvx, real vvy, real vvz);
+   real getCalculationTime() override;
 protected:
    void initDataSet();
 private:
-//   LBMReal f[D3Q27System::ENDF+1];
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+//   real f[D3Q27System::ENDF+1];
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 //   LBMReal OxyyMxzz;
-   CbArray4D<LBMReal, IndexerX4X3X2X1> v;
+   CbArray4D<real, IndexerX4X3X2X1> v;
 };
 
 #endif // InitDensityLBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
index efe2c8e7cfb39b960c0c86405a05633816fe56ef..33bf1e623ce943d4edf3b11c3f51ad585adf4262 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
@@ -4,26 +4,26 @@ InterpolationHelper::InterpolationHelper(InterpolationProcessorPtr iProcessor) :
 //////////////////////////////////////////////////////////////////////////
 InterpolationHelper::~InterpolationHelper() = default;
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1(D3Q27ICell &icellF, LBMReal *icellC, double /*x1*/, double /*x2*/,
-                                          double /*x3*/, LBMReal omega)
+void InterpolationHelper::interpolate8to1(D3Q27ICell &icellF, real *icellC, real /*x1*/, real /*x2*/,
+                                          real /*x3*/, real omega)
 {
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 1.0);
     iProcessor->calcInterpolatedNodeFC(icellC, omega);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1WithVelocity(D3Q27ICell &icellF, double x1, double x2, double x3,
-                                                      LBMReal omega, LBMReal &vx1, LBMReal &vx2, LBMReal &vx3)
+void InterpolationHelper::interpolate8to1WithVelocity(D3Q27ICell &icellF, real x1, real x2, real x3,
+                                                      real omega, real &vx1, real &vx2, real &vx3)
 {
     iProcessor->setOffsets(0.0, 0.0, 0.0);
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 0.0);
     iProcessor->calcInterpolatedVelocity(x1, x2, x3, vx1, vx2, vx3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, double x1, double x2,
-                                                                     double x3, LBMReal omega, LBMReal &vx1,
-                                                                     LBMReal &vx2, LBMReal &vx3, LBMReal &tauxx,
-                                                                     LBMReal &tauyy, LBMReal &tauzz, LBMReal &tauxy,
-                                                                     LBMReal &tauxz, LBMReal &tauyz)
+void InterpolationHelper::interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, real x1, real x2,
+                                                                     real x3, real omega, real &vx1,
+                                                                     real &vx2, real &vx3, real &tauxx,
+                                                                     real &tauyy, real &tauzz, real &tauxy,
+                                                                     real &tauxz, real &tauyz)
 {
     iProcessor->setOffsets(0.0, 0.0, 0.0);
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 0.0);
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
index 0e7318a8c44785679cdad1292bf561cc631b2041..b67e8d18ac5c54c775c098aad484d4b5657a917b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
@@ -11,13 +11,13 @@ class InterpolationHelper
 public:
     InterpolationHelper(InterpolationProcessorPtr iProcessor);
     ~InterpolationHelper();
-    void interpolate8to1(D3Q27ICell &icellF, LBMReal *icellC, double x1, double x2, double x3, LBMReal omega);
-    void interpolate8to1WithVelocity(D3Q27ICell &icellF, double x1, double x2, double x3, LBMReal omega, LBMReal &vx1,
-                                     LBMReal &vx2, LBMReal &vx3);
-    void interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, double x1, double x2, double x3, LBMReal omega,
-                                                    LBMReal &vx1, LBMReal &vx2, LBMReal &vx3, LBMReal &tauxx,
-                                                    LBMReal &tauyy, LBMReal &tauzz, LBMReal &tauxy, LBMReal &tauxz,
-                                                    LBMReal &tauyz);
+    void interpolate8to1(D3Q27ICell &icellF, real *icellC, real x1, real x2, real x3, real omega);
+    void interpolate8to1WithVelocity(D3Q27ICell &icellF, real x1, real x2, real x3, real omega, real &vx1,
+                                     real &vx2, real &vx3);
+    void interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, real x1, real x2, real x3, real omega,
+                                                    real &vx1, real &vx2, real &vx3, real &tauxx,
+                                                    real &tauyy, real &tauzz, real &tauxy, real &tauxz,
+                                                    real &tauyz);
 
 protected:
 private:
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
index 8d2a4163b3127d5199c0419e34e1c4b28d505e2c..a82f397c9f89d7605d8409b8f32f8d87a8f8a402 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
@@ -41,12 +41,12 @@ void InterpolationProcessor::writeICellInv(SPtr<DistributionArray3D> f, const D3
     f->setDistributionInv(icell.TNE, x1 + 1, x2 + 1, x3 + 1);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationProcessor::writeINode(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3)
+void InterpolationProcessor::writeINode(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3)
 {
     f->setDistribution(inode, x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationProcessor::writeINodeInv(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2,
+void InterpolationProcessor::writeINodeInv(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2,
                                            int x3)
 {
     f->setDistributionInv(inode, x1, x2, x3);
@@ -65,7 +65,7 @@ bool InterpolationProcessor::iCellHasSolid(const SPtr<BCArray3D> bcArray, int x1
 //////////////////////////////////////////////////////////////////////////
 bool InterpolationProcessor::findNeighborICell(const SPtr<BCArray3D> bcArray, SPtr<DistributionArray3D> f,
                                                D3Q27ICell &icell, int maxX1, int maxX2, int maxX3, int x1, int x2,
-                                               int x3, LBMReal &xoff, LBMReal &yoff, LBMReal &zoff)
+                                               int x3, real &xoff, real &yoff, real &zoff)
 {
     m_maxX1 = maxX1;
     m_maxX2 = maxX2;
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
index 627549dc991f31b543ca23e4c87e8520feb84af3..f298a531b61ca4c4d9ddffc6e2dfeab535be0aa1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
@@ -7,14 +7,14 @@
 #include "LBMSystem.h"
 
 struct D3Q27ICell {
-    LBMReal TSW[27];
-    LBMReal TNW[27];
-    LBMReal TNE[27];
-    LBMReal TSE[27];
-    LBMReal BSW[27];
-    LBMReal BNW[27];
-    LBMReal BNE[27];
-    LBMReal BSE[27];
+    real TSW[27];
+    real TNW[27];
+    real TNE[27];
+    real TSE[27];
+    real BSW[27];
+    real BNW[27];
+    real BNE[27];
+    real BSE[27];
 };
 
 class InterpolationProcessor;
@@ -28,34 +28,34 @@ public:
     InterpolationProcessor();
     virtual ~InterpolationProcessor();
     virtual InterpolationProcessorPtr clone()                                    = 0;
-    virtual void setOmegas(LBMReal omegaC, LBMReal omegaF)                       = 0;
+    virtual void setOmegas(real omegaC, real omegaF)                       = 0;
     virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF) = 0;
-    virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF, LBMReal xoff, LBMReal yoff,
-                                         LBMReal zoff)                           = 0;
-    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, LBMReal *icellC)    = 0;
-    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, LBMReal *icellC, LBMReal xoff, LBMReal yoff,
-                                         LBMReal zoff)                           = 0;
+    virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF, real xoff, real yoff,
+                                         real zoff)                           = 0;
+    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, real *icellC)    = 0;
+    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, real *icellC, real xoff, real yoff,
+                                         real zoff)                           = 0;
 
     static void readICell(SPtr<DistributionArray3D> f, D3Q27ICell &icell, int x1, int x2, int x3);
     static void writeICell(SPtr<DistributionArray3D> f, const D3Q27ICell &icell, int x1, int x2, int x3);
     static void writeICellInv(SPtr<DistributionArray3D> f, const D3Q27ICell &icell, int x1, int x2, int x3);
-    static void writeINode(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3);
-    static void writeINodeInv(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3);
+    static void writeINode(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3);
+    static void writeINodeInv(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3);
     static bool iCellHasSolid(const SPtr<BCArray3D> bcArray, int x1, int x2, int x3);
     static int iCellHowManySolids(const SPtr<BCArray3D> bcArray, int x1, int x2, int x3);
 
     bool findNeighborICell(const SPtr<BCArray3D> bcArray, SPtr<DistributionArray3D> f, D3Q27ICell &icell, int maxX1,
-                           int maxX2, int maxX3, int x1, int x2, int x3, LBMReal &xoff, LBMReal &yoff, LBMReal &zoff);
+                           int maxX2, int maxX3, int x1, int x2, int x3, real &xoff, real &yoff, real &zoff);
 
 protected:
-    virtual void calcInterpolatedCoefficiets(const D3Q27ICell &icell, LBMReal omega, LBMReal eps_new) {}
-    virtual void calcInterpolatedNodeFC(LBMReal *f, LBMReal omega) {}
-    virtual void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal &vx1, LBMReal &vx2, LBMReal &vx3) {}
-    virtual void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z, LBMReal &tauxx, LBMReal &tauyy,
-                                             LBMReal &tauzz, LBMReal &tauxy, LBMReal &tauxz, LBMReal &tauyz)
+    virtual void calcInterpolatedCoefficiets(const D3Q27ICell &icell, real omega, real eps_new) {}
+    virtual void calcInterpolatedNodeFC(real *f, real omega) {}
+    virtual void calcInterpolatedVelocity(real x, real y, real z, real &vx1, real &vx2, real &vx3) {}
+    virtual void calcInterpolatedShearStress(real x, real y, real z, real &tauxx, real &tauyy,
+                                             real &tauzz, real &tauxy, real &tauxz, real &tauyz)
     {
     }
-    virtual void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) {}
+    virtual void setOffsets(real xoff, real yoff, real zoff) {}
     friend class InterpolationHelper;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
index 3c588e1506d8649149daad5588e2290c0832334a..0f9a9a96586268c872562e4d2ddfab5ef8e6377c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
@@ -53,22 +53,22 @@ void LBMKernel::setBCProcessor(SPtr<BCProcessor> bcp) { bcProcessor = bcp; }
 //////////////////////////////////////////////////////////////////////////
 SPtr<BCProcessor> LBMKernel::getBCProcessor() const { return bcProcessor; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setCollisionFactor(double collFactor) { this->collFactor = collFactor; }
+void LBMKernel::setCollisionFactor(real collFactor) { this->collFactor = collFactor; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactor() const { return collFactor; }
+real LBMKernel::getCollisionFactor() const { return collFactor; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setForcingX1(LBMReal forcingX1)
+void LBMKernel::setForcingX1(real forcingX1)
 {
     this->muForcingX1.SetExpr(UbSystem::toString(forcingX1, LBMRealLim::digits10));
     this->checkFunction(muForcingX1);
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setForcingX2(LBMReal forcingX2)
+void LBMKernel::setForcingX2(real forcingX2)
 {
     this->muForcingX2.SetExpr(UbSystem::toString(forcingX2, LBMRealLim::digits10));
     this->checkFunction(muForcingX2);
 }
-void LBMKernel::setForcingX3(LBMReal forcingX3)
+void LBMKernel::setForcingX3(real forcingX3)
 {
     this->muForcingX3.SetExpr(UbSystem::toString(forcingX3, LBMRealLim::digits10));
     this->checkFunction(muForcingX3);
@@ -111,7 +111,7 @@ void LBMKernel::setForcingX3(const std::string &muParserString)
 //////////////////////////////////////////////////////////////////////////
 void LBMKernel::checkFunction(mu::Parser fct)
 {
-    double x1 = 1.0, x2 = 1.0, x3 = 1.0, dt = 1.0, nue = 1.0, rho = 1.0;
+    real x1 = 1.0, x2 = 1.0, x3 = 1.0, dt = 1.0, nue = 1.0, rho = 1.0;
     fct.DefineVar("x1", &x1);
     fct.DefineVar("x2", &x2);
     fct.DefineVar("x3", &x3);
@@ -141,9 +141,9 @@ void LBMKernel::setIndex(int x1, int x2, int x3)
 //////////////////////////////////////////////////////////////////////////
 SPtr<DataSet3D> LBMKernel::getDataSet() const { return this->dataSet; }
 //////////////////////////////////////////////////////////////////////////
-LBMReal LBMKernel::getDeltaT() const { return this->deltaT; }
+real LBMKernel::getDeltaT() const { return this->deltaT; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setDeltaT(LBMReal dt) { deltaT = dt; }
+void LBMKernel::setDeltaT(real dt) { deltaT = dt; }
 //////////////////////////////////////////////////////////////////////////
 bool LBMKernel::getCompressible() const { return compressible; }
 //////////////////////////////////////////////////////////////////////////
@@ -188,49 +188,49 @@ bool LBMKernel::isInsideOfDomain(const int &x1, const int &x2, const int &x3) co
 }
 //////////////////////////////////////////////////////////////////////////
 
-void LBMKernel::setCollisionFactorMultiphase(double collFactorL, double collFactorG)
+void LBMKernel::setCollisionFactorMultiphase(real collFactorL, real collFactorG)
 {
     this->collFactorL = collFactorL;
     this->collFactorG = collFactorG;
 }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactorL() const { return collFactorL; }
+real LBMKernel::getCollisionFactorL() const { return collFactorL; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactorG() const { return collFactorG; }
+real LBMKernel::getCollisionFactorG() const { return collFactorG; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setDensityRatio(double densityRatio) { this->densityRatio = densityRatio; }
+void LBMKernel::setDensityRatio(real densityRatio) { this->densityRatio = densityRatio; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getDensityRatio() const { return densityRatio; }
+real LBMKernel::getDensityRatio() const { return densityRatio; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setMultiphaseModelParameters(LBMReal beta, LBMReal kappa)
+void LBMKernel::setMultiphaseModelParameters(real beta, real kappa)
 {
     this->beta  = beta;
     this->kappa = kappa;
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa)
+void LBMKernel::getMultiphaseModelParameters(real &beta, real &kappa)
 {
     beta  = this->beta;
     kappa = this->kappa;
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setContactAngle(double contactAngle) { this->contactAngle = contactAngle; }
+void LBMKernel::setContactAngle(real contactAngle) { this->contactAngle = contactAngle; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getContactAngle() const { return contactAngle; }
+real LBMKernel::getContactAngle() const { return contactAngle; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhiL(double phiL) { this->phiL = phiL; }
+void LBMKernel::setPhiL(real phiL) { this->phiL = phiL; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhiH(double phiH) { this->phiH = phiH; }
+void LBMKernel::setPhiH(real phiH) { this->phiH = phiH; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhiL() const { return phiL; }
+real LBMKernel::getPhiL() const { return phiL; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhiH() const { return phiH; }
+real LBMKernel::getPhiH() const { return phiH; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhaseFieldRelaxation(double tauH) { this->tauH = tauH; }
+void LBMKernel::setPhaseFieldRelaxation(real tauH) { this->tauH = tauH; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhaseFieldRelaxation() const { return tauH; }
+real LBMKernel::getPhaseFieldRelaxation() const { return tauH; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setMobility(double mob) { this->mob = mob; }
+void LBMKernel::setMobility(real mob) { this->mob = mob; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setInterfaceWidth(double w) { this->interfaceWidth = w; }
+void LBMKernel::setInterfaceWidth(real w) { this->interfaceWidth = w; }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
index d5eb02d4c0ef310cdfd63d283abc9719996e5f84..bc12a1ed93ffc241f4e121207376e44533908259 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
@@ -49,7 +49,7 @@ class Block3D;
 class LBMKernel : public ILBMKernel, public enableSharedFromThis<LBMKernel>
 {
 public:
-    using LBMRealLim = std::numeric_limits<LBMReal>;
+    using LBMRealLim = std::numeric_limits<real>;
 
 public:
     LBMKernel();
@@ -57,13 +57,13 @@ public:
     virtual SPtr<LBMKernel> clone() = 0;
 
     void calculate(int step) override    = 0;
-    double getCalculationTime() override = 0;
+    real getCalculationTime() override = 0;
 
     void setBCProcessor(SPtr<BCProcessor> bcp) override;
     SPtr<BCProcessor> getBCProcessor() const override;
 
-    void setCollisionFactor(double collFactor) override;
-    double getCollisionFactor() const override;
+    void setCollisionFactor(real collFactor) override;
+    real getCollisionFactor() const override;
 
     void setGhostLayerWidth(int witdh);
     int getGhostLayerWidth() const override;
@@ -71,9 +71,9 @@ public:
     void setDataSet(SPtr<DataSet3D> dataSet);
     SPtr<DataSet3D> getDataSet() const override;
 
-    void setForcingX1(LBMReal forcingX1);
-    void setForcingX2(LBMReal forcingX2);
-    void setForcingX3(LBMReal forcingX3);
+    void setForcingX1(real forcingX1);
+    void setForcingX2(real forcingX2);
+    void setForcingX3(real forcingX3);
 
     void setForcingX1(const mu::Parser &parser);
     void setForcingX2(const mu::Parser &parser);
@@ -85,8 +85,8 @@ public:
 
     void setIndex(int x1, int x2, int x3);
 
-    LBMReal getDeltaT() const override;
-    void setDeltaT(LBMReal dt);
+    real getDeltaT() const override;
+    void setDeltaT(real dt);
 
     bool getCompressible() const override;
     void setCompressible(bool val);
@@ -112,28 +112,28 @@ public:
 
     ///////// Extra methods for the multiphase kernel ////////////
 
-    void setCollisionFactorMultiphase(double collFactorL, double collFactorG);
-    double getCollisionFactorL() const;
-    double getCollisionFactorG() const;
-    void setDensityRatio(double densityRatio);
-    double getDensityRatio() const;
-    void setMultiphaseModelParameters(LBMReal beta, LBMReal kappa);
-    void getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa);
-    void setContactAngle(double contactAngle);
-    double getContactAngle() const;
-    void setPhiL(double phiL);
-    void setPhiH(double phiH);
-    double getPhiL() const;
-    double getPhiH() const;
-    void setPhaseFieldRelaxation(double tauH);
-    double getPhaseFieldRelaxation() const;
-    void setMobility(double mob);
-    void setInterfaceWidth(double w);
+    void setCollisionFactorMultiphase(real collFactorL, real collFactorG);
+    real getCollisionFactorL() const;
+    real getCollisionFactorG() const;
+    void setDensityRatio(real densityRatio);
+    real getDensityRatio() const;
+    void setMultiphaseModelParameters(real beta, real kappa);
+    void getMultiphaseModelParameters(real &beta, real &kappa);
+    void setContactAngle(real contactAngle);
+    real getContactAngle() const;
+    void setPhiL(real phiL);
+    void setPhiH(real phiH);
+    real getPhiL() const;
+    real getPhiH() const;
+    void setPhaseFieldRelaxation(real tauH);
+    real getPhaseFieldRelaxation() const;
+    void setMobility(real mob);
+    void setInterfaceWidth(real w);
 
 protected:
     SPtr<DataSet3D> dataSet;
     SPtr<BCProcessor> bcProcessor;
-    LBMReal collFactor;
+    real collFactor;
     int ghostLayerWidth{ 1 };
     bool compressible{ false };
 
@@ -143,7 +143,7 @@ protected:
     mu::Parser muForcingX2;
     mu::Parser muForcingX3;
     int ix1, ix2, ix3;
-    LBMReal deltaT{ 1.0 };
+    real deltaT{ 1.0 };
 
     // sponge layer
     bool withSpongeLayer{ false };
@@ -154,17 +154,17 @@ protected:
     std::array<int, 3> nx;
 
     // Multiphase model
-    LBMReal collFactorL;
-    LBMReal collFactorG;
-    LBMReal densityRatio;
-    LBMReal beta;
-    LBMReal kappa;
-    LBMReal contactAngle;
-    LBMReal phiL;
-    LBMReal phiH;
-    LBMReal tauH;
-    LBMReal mob;
-    LBMReal interfaceWidth { 4.0 };
+    real collFactorL;
+    real collFactorG;
+    real densityRatio;
+    real beta;
+    real kappa;
+    real contactAngle;
+    real phiL;
+    real phiH;
+    real tauH;
+    real mob;
+    real interfaceWidth { 4.0 };
 
 private:
     void checkFunction(mu::Parser fct);
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
index 1fcdf118fa920d648b511c60ebbc48542e164be0..081e9fe8ecf850957dc49229379f4e112aa38c17 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
@@ -5,10 +5,13 @@
 #include "DataSet3D.h"
 #include "BCProcessor.h"
 #include "BCArray3D.h"
+#include "lbm/constants/NumericConstants.h"
+
+using namespace vf::lbm::constant;
+//using namespace UbMath;
 
 //#define PROOF_CORRECTNESS
 
-using namespace UbMath;
 
 //////////////////////////////////////////////////////////////////////////
 LBMKernelETD3Q27BGK::LBMKernelETD3Q27BGK() 
@@ -42,6 +45,7 @@ SPtr<LBMKernel> LBMKernelETD3Q27BGK::clone()
 void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    //initializing of forcing stuff 
    if (withForcing)
@@ -60,9 +64,9 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
    zeroDistributions = std::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal drho,vx1,vx2,vx3;
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real drho,vx1,vx2,vx3;
    const int bcArrayMaxX1 = (int)bcArray->getNX1();
    const int bcArrayMaxX2 = (int)bcArray->getNX2();
    const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -136,7 +140,7 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                + f[DIR_0MP] + f[DIR_PPP] + f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] - f[DIR_PPM] - f[DIR_MMM] - f[DIR_PMM] 
                - f[DIR_MPM];
 
-               LBMReal cu_sq=1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
+               real cu_sq=1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
 
                feq[DIR_000] =  c8o27*(drho-cu_sq);
                feq[DIR_P00] =  c2o27*(drho+3.0*( vx1   )+c9o2*( vx1   )*( vx1   )-cu_sq);
@@ -238,11 +242,11 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                }
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = f[REST] + f[DIR_P00] + f[W] + f[N] + f[S] + f[T] + f[B] 
+               real rho_post = f[REST] + f[DIR_P00] + f[W] + f[N] + f[S] + f[T] + f[B] 
                + f[NE] + f[SW] + f[SE] + f[NW] + f[TE] + f[BW] + f[BE]
                + f[TW] + f[TN] + f[BS] + f[BN] + f[TS] + f[TNE] + f[TSW]
                + f[TSE] + f[TNW] + f[BNE] + f[BSW] + f[BSE] + f[BNW];
-               LBMReal dif = drho - rho_post;
+               real dif = drho - rho_post;
 #ifdef SINGLEPRECISION
                if(dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -255,35 +259,35 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                //////////////////////////////////////////////////////////////////////////
                //write distribution
                //////////////////////////////////////////////////////////////////////////
-               (*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[D3Q27System::INV_P00];
-               (*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[D3Q27System::INV_0P0];
-               (*this->localDistributions)(D3Q27System::ET_T,x1,  x2,  x3) = f[D3Q27System::INV_00P];
-               (*this->localDistributions)(D3Q27System::ET_NE,x1,  x2,  x3) = f[D3Q27System::INV_PP0];
-               (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,  x3) = f[D3Q27System::INV_MP0];
-               (*this->localDistributions)(D3Q27System::ET_TE,x1,  x2,  x3) = f[D3Q27System::INV_P0P];
-               (*this->localDistributions)(D3Q27System::ET_TW,x1p,x2,  x3) = f[D3Q27System::INV_M0P];
-               (*this->localDistributions)(D3Q27System::ET_TN,x1,  x2,  x3) = f[D3Q27System::INV_0PP];
-               (*this->localDistributions)(D3Q27System::ET_TS,x1,  x2p,x3) = f[D3Q27System::INV_0MP];
-               (*this->localDistributions)(D3Q27System::ET_TNE,x1,  x2,  x3) = f[D3Q27System::INV_PPP];
-               (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,  x3) = f[D3Q27System::INV_MPP];
-               (*this->localDistributions)(D3Q27System::ET_TSE,x1,  x2p,x3) = f[D3Q27System::INV_PMP];
-               (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3) = f[D3Q27System::INV_MMP];
+               (*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[INV_P00];
+               (*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[INV_0P0];
+               (*this->localDistributions)(D3Q27System::ET_T,x1,  x2,  x3) = f[INV_00P];
+               (*this->localDistributions)(D3Q27System::ET_NE,x1,  x2,  x3) = f[INV_PP0];
+               (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,  x3) = f[INV_MP0];
+               (*this->localDistributions)(D3Q27System::ET_TE,x1,  x2,  x3) = f[INV_P0P];
+               (*this->localDistributions)(D3Q27System::ET_TW,x1p,x2,  x3) = f[INV_M0P];
+               (*this->localDistributions)(D3Q27System::ET_TN,x1,  x2,  x3) = f[INV_0PP];
+               (*this->localDistributions)(D3Q27System::ET_TS,x1,  x2p,x3) = f[INV_0MP];
+               (*this->localDistributions)(D3Q27System::ET_TNE,x1,  x2,  x3) = f[INV_PPP];
+               (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,  x3) = f[INV_MPP];
+               (*this->localDistributions)(D3Q27System::ET_TSE,x1,  x2p,x3) = f[INV_PMP];
+               (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3) = f[INV_MMP];
 
-               (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,  x3    ) = f[D3Q27System::INV_M00 ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,  x2p,x3    ) = f[D3Q27System::INV_0M0 ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,  x2,  x3p  ) = f[D3Q27System::INV_00M ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3   ) = f[D3Q27System::INV_MM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,  x2p,x3   ) = f[D3Q27System::INV_PM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,  x3p ) = f[D3Q27System::INV_M0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,  x2,  x3p ) = f[D3Q27System::INV_P0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,  x2p,x3p ) = f[D3Q27System::INV_0MM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,  x2,  x3p ) = f[D3Q27System::INV_0PM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p) = f[D3Q27System::INV_MMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,  x2p,x3p) = f[D3Q27System::INV_PMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,  x3p) = f[D3Q27System::INV_MPM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,  x2,  x3p) = f[D3Q27System::INV_PPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,  x3    ) = f[INV_M00 ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,  x2p,x3    ) = f[INV_0M0 ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,  x2,  x3p  ) = f[INV_00M ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3   ) = f[INV_MM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,  x2p,x3   ) = f[INV_PM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,  x3p ) = f[INV_M0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,  x2,  x3p ) = f[INV_P0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,  x2p,x3p ) = f[INV_0MM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,  x2,  x3p ) = f[INV_0PM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p) = f[INV_MMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,  x2p,x3p) = f[INV_PMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,  x3p) = f[INV_MPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,  x2,  x3p) = f[INV_PPM];
 
-               (*this->zeroDistributions)(x1,x2,x3) = f[D3Q27System::DIR_000];
+               (*this->zeroDistributions)(x1,x2,x3) = f[DIR_000];
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -293,7 +297,7 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
    }
 }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernelETD3Q27BGK::getCalculationTime()
+real LBMKernelETD3Q27BGK::getCalculationTime()
 {
    return 0.0;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
index 09e495c2375b8f009f2a231ca4a762437031303b..c02725698d64e129f2fc8d5858d8598b8db6682f 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
@@ -14,21 +14,21 @@ public:
    ~LBMKernelETD3Q27BGK() override;
    void calculate(int step)override;
    SPtr<LBMKernel> clone()override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
 
 private:
    void initDataSet();
    //void collideAllCompressible();
    //void collideAllIncompressible();
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 
 
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
index de485c28da920b150476ad8e7b4e1f03019e132e..8f4feed79fb546289ebda2ae49439144e5a9e388 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
@@ -67,17 +67,17 @@ public:
 
     LBMUnitConverter() = default;
 
-    LBMUnitConverter(const double &refLengthWorld, const double &csWorld, const double &rhoWorld,
-                     const double &refLengthLb, const double &csLb = 1.0 / std::sqrt(3.0), const double &rhoLb = 1.0)
+    LBMUnitConverter(const real &refLengthWorld, const real &csWorld, const real &rhoWorld,
+                     const real &refLengthLb, const real &csLb = 1.0 / std::sqrt(3.0), const real &rhoLb = 1.0)
     {
         this->init(refLengthWorld, csWorld, rhoWorld, csWorld, refLengthLb, rhoLb, csLb);
     }
 
-    LBMUnitConverter(const double &refLengthWorld, WORLD_MATERIAL worldMaterial, const double &refLengthLb,
-                     const double &csLb = 1.0 / std::sqrt(3.0), const double &rhoLb = 1.0)
+    LBMUnitConverter(const real &refLengthWorld, WORLD_MATERIAL worldMaterial, const real &refLengthLb,
+                     const real &csLb = 1.0 / std::sqrt(3.0), const real &rhoLb = 1.0)
     {
-        double csWorld;
-        double rhoWorld;
+        real csWorld;
+        real rhoWorld;
 
         if (worldMaterial == WATER) {
             csWorld  = 1484 /*m/s*/;
@@ -99,39 +99,39 @@ public:
 
     virtual ~LBMUnitConverter() = default;
 
-    double getRefRhoLb() { return refRhoLb; }
+    real getRefRhoLb() { return refRhoLb; }
 
-    double getFactorLentghLbToW() { return factorLengthLbToW; }
-    double getFactorLentghWToLb() { return 1.0 / this->getFactorLentghLbToW(); }
+    real getFactorLentghLbToW() { return factorLengthLbToW; }
+    real getFactorLentghWToLb() { return 1.0 / this->getFactorLentghLbToW(); }
 
-    double getFactorTimeLbToW() { return factorTimeLbToW; }
-    double getFactorTimeWToLb() { return 1.0 / this->getFactorTimeLbToW(); }
+    real getFactorTimeLbToW() { return factorTimeLbToW; }
+    real getFactorTimeWToLb() { return 1.0 / this->getFactorTimeLbToW(); }
 
-    double getFactorVelocityLbToW() { return factorLengthLbToW / factorTimeLbToW; }
-    double getFactorVelocityWToLb() { return 1.0 / this->getFactorVelocityLbToW(); }
+    real getFactorVelocityLbToW() { return factorLengthLbToW / factorTimeLbToW; }
+    real getFactorVelocityWToLb() { return 1.0 / this->getFactorVelocityLbToW(); }
 
-    double getFactorViscosityLbToW() { return factorLengthLbToW * factorLengthLbToW / factorTimeLbToW; }
-    double getFactorViscosityWToLb() { return 1.0 / this->getFactorViscosityLbToW(); }
+    real getFactorViscosityLbToW() { return factorLengthLbToW * factorLengthLbToW / factorTimeLbToW; }
+    real getFactorViscosityWToLb() { return 1.0 / this->getFactorViscosityLbToW(); }
 
-    double getFactorDensityLbToW() { return this->factorMassLbToW / std::pow(factorLengthLbToW, 3.0); }
-    double getFactorDensityWToLb() { return 1.0 / this->getFactorDensityLbToW(); }
+    real getFactorDensityLbToW() { return this->factorMassLbToW / std::pow(factorLengthLbToW, 3.0); }
+    real getFactorDensityWToLb() { return 1.0 / this->getFactorDensityLbToW(); }
 
-    double getFactorPressureLbToW(){ return this->factorMassLbToW / (factorLengthLbToW * factorTimeLbToW * factorTimeLbToW); }
-    double getFactorPressureWToLb() { return 1.0 / this->getFactorPressureLbToW(); }
+    real getFactorPressureLbToW(){ return this->factorMassLbToW / (factorLengthLbToW * factorTimeLbToW * factorTimeLbToW); }
+    real getFactorPressureWToLb() { return 1.0 / this->getFactorPressureLbToW(); }
 
-    double getFactorMassLbToW() { return this->factorMassLbToW; }
-    double getFactorMassWToLb() { return 1.0 / this->getFactorMassLbToW(); }
+    real getFactorMassLbToW() { return this->factorMassLbToW; }
+    real getFactorMassWToLb() { return 1.0 / this->getFactorMassLbToW(); }
 
-    double getFactorForceLbToW() { return factorMassLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
-    double getFactorForceWToLb() { return 1.0 / this->getFactorForceLbToW(); }
+    real getFactorForceLbToW() { return factorMassLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
+    real getFactorForceWToLb() { return 1.0 / this->getFactorForceLbToW(); }
 
-    double getFactorTorqueLbToW() { return factorMassLbToW * factorLengthLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW);}
-    double getFactorTorqueWToLb() { return 1.0 / this->getFactorTorqueLbToW(); }
+    real getFactorTorqueLbToW() { return factorMassLbToW * factorLengthLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW);}
+    real getFactorTorqueWToLb() { return 1.0 / this->getFactorTorqueLbToW(); }
 
-    double getFactorAccLbToW() { return factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
-    double getFactorAccWToLb() { return 1.0 / this->getFactorAccLbToW(); }
+    real getFactorAccLbToW() { return factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
+    real getFactorAccWToLb() { return 1.0 / this->getFactorAccLbToW(); }
 
-    double getFactorTimeLbToW(double deltaX) const { return factorTimeWithoutDx * deltaX; }
+    real getFactorTimeLbToW(real deltaX) const { return factorTimeWithoutDx * deltaX; }
 
 
     /*==========================================================*/
@@ -185,8 +185,8 @@ public:
         return out.str();
     }
 
-    void init(const double &refLengthWorld, const double & /*csWorld*/, const double &rhoWorld, const double &vWorld,
-              const double &refLengthLb, const double &rhoLb, const double &vLb)
+    void init(const real &refLengthWorld, const real & /*csWorld*/, const real &rhoWorld, const real &vWorld,
+              const real &refLengthLb, const real &rhoLb, const real &vLb)
     {
         factorLengthLbToW   = refLengthWorld / refLengthLb;
         factorTimeLbToW     = vLb / vWorld * factorLengthLbToW;
@@ -196,11 +196,11 @@ public:
     }
 
 protected:
-    double factorLengthLbToW{ 1.0 };
-    double factorTimeLbToW{ 1.0 };
-    double factorMassLbToW{ 1.0 };
-    double refRhoLb{ 1.0 };
-    double factorTimeWithoutDx{ 0.0 };
+    real factorLengthLbToW{ 1.0 };
+    real factorTimeLbToW{ 1.0 };
+    real factorMassLbToW{ 1.0 };
+    real refRhoLb{ 1.0 };
+    real factorTimeWithoutDx{ 0.0 };
 };
 
 #endif // LBMUNITCONVERTER_H
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
index ad80b372251a11161de68c6935097da8eec3edc5..3b660f6dcb985be987d0c0d46f2b29bc15fab468 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
@@ -83,7 +83,9 @@ SPtr<LBMKernel> MultiphaseCumulantLBMKernel::clone()
 void MultiphaseCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+//    using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
@@ -111,10 +113,10 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
         for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -125,34 +127,34 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                                     (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                                     (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) +
@@ -162,8 +164,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
-        LBMReal forcingTerm[D3Q27System::ENDF + 1];
+        real collFactorM;
+        real forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
             for (int x2 = minX2; x2 < maxX2; x2++) {
@@ -196,52 +198,52 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         findNeighbors(phaseField, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
-
-                        LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
+
+                        real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
                         collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                         if (withForcing) {
                             // muX1 = static_cast<double>(x1-1+ix1*maxX1);
@@ -258,7 +260,7 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             forcingX2 = muForcingX2.Eval();
                             forcingX3 = muForcingX3.Eval();
 
-                            LBMReal rho_m = 1.0 / densityRatio;
+                            real rho_m = 1.0 / densityRatio;
                             forcingX1     = forcingX1 * (rho - rho_m);
                             forcingX2     = forcingX2 * (rho - rho_m);
                             forcingX3     = forcingX3 * (rho - rho_m);
@@ -268,19 +270,19 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             // uz += forcingX3*deltaT*0.5; // Z
                         }
 
-                        LBMReal ux = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                        real ux = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                       (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
                                       (mfcbb - mfabb)) /
                                          (rho * c1o3) +
                                      (mu * dX1_phi + forcingX1) / (2 * rho);
 
-                        LBMReal uy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                        real uy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                                       (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
                                       (mfbcb - mfbab)) /
                                          (rho * c1o3) +
                                      (mu * dX2_phi + forcingX2) / (2 * rho);
 
-                        LBMReal uz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                        real uz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                                       (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
                                       (mfbbc - mfbba)) /
                                          (rho * c1o3) +
@@ -288,17 +290,17 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         //--------------------------------------------------------
 
-                        LBMReal ux2 = ux * ux;
-                        LBMReal uy2 = uy * uy;
-                        LBMReal uz2 = uz * uz;
+                        real ux2 = ux * ux;
+                        real uy2 = uy * uy;
+                        real uz2 = uz * uz;
 
                         //----------- Calculating Forcing Terms * -------------
                         for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-                            LBMReal velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
-                            LBMReal velSq1  = velProd * velProd;
-                            LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2));
+                            real velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
+                            real velSq1  = velProd * velProd;
+                            real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2));
 
-                            LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
+                            real fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
                             forcingTerm[dir] = ((-ux) * (fac1 * dX1_phi + gamma * (mu * dX1_phi + forcingX1)) +
                                                 (-uy) * (fac1 * dX2_phi + gamma * (mu * dX2_phi + forcingX2)) +
@@ -308,8 +310,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                                                (DX3[dir]) * (fac1 * dX3_phi + gamma * (mu * dX3_phi + forcingX3));
                         }
 
-                        LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-                        LBMReal fac1      = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+                        real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+                        real fac1      = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
                         forcingTerm[DIR_000] = (-ux) * (fac1 * dX1_phi + gamma * (mu * dX1_phi + forcingX1)) +
                                             (-uy) * (fac1 * dX2_phi + gamma * (mu * dX2_phi + forcingX2)) +
                                             (-uz) * (fac1 * dX3_phi + gamma * (mu * dX3_phi + forcingX3));
@@ -344,13 +346,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         mfcca = 3.0 * (mfcca + 0.5 * forcingTerm[DIR_PPM]) / rho;  //-(3.0*p1 - rho)*WEIGTH[BNE];
                         mfbbb = 3.0 * (mfbbb + 0.5 * forcingTerm[DIR_000]) / rho; //- (3.0*p1 - rho)*WEIGTH[REST];
 
-                        LBMReal rho1 = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
+                        real rho1 = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                        (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                        (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) +
                                        (mfbba + mfbbc) + mfbbb;
 
 
-                        LBMReal oMdrho, m0, m1, m2;
+                        real oMdrho, m0, m1, m2;
 
                         oMdrho = mfccc + mfaaa;
                         m0     = mfaca + mfcac;
@@ -380,8 +382,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         // oMdrho = rho - (oMdrho + m0);
 
                         ////////////////////////////////////////////////////////////////////////////////////
-                        LBMReal wadjust;
-                        LBMReal qudricLimit = 0.01;
+                        real wadjust;
+                        real qudricLimit = 0.01;
                         ////////////////////////////////////////////////////////////////////////////////////
                         // Hin
                         ////////////////////////////////////////////////////////////////////////////////////
@@ -612,41 +614,41 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         ////////////////////////////////////////////////////////////////////////////////////
                         // Cumulants
                         ////////////////////////////////////////////////////////////////////////////////////
-                        LBMReal OxxPyyPzz = 1.; // omega2 or bulk viscosity
-                        LBMReal OxyyPxzz  = 1.; //-s9;//2+s9;//
-                        LBMReal OxyyMxzz  = 1.; // 2+s9;//
-                        LBMReal O4        = 1.;
-                        LBMReal O5        = 1.;
-                        LBMReal O6        = 1.;
+                        real OxxPyyPzz = 1.; // omega2 or bulk viscosity
+                        real OxyyPxzz  = 1.; //-s9;//2+s9;//
+                        real OxyyMxzz  = 1.; // 2+s9;//
+                        real O4        = 1.;
+                        real O5        = 1.;
+                        real O6        = 1.;
 
                         // Cum 4.
-                        LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-                        LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-                        LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+                        real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+                        real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+                        real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-                        LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) +
+                        real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) +
                                                   c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-                        LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) +
+                        real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) +
                                                   c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-                        LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) +
+                        real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) +
                                                   c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
                         // Cum 5.
-                        LBMReal CUMbcc = mfbcc -
+                        real CUMbcc = mfbcc -
                                          (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb +
                                           2. * (mfbab * mfacb + mfbba * mfabc)) -
                                          c1o3 * (mfbca + mfbac) * oMdrho;
-                        LBMReal CUMcbc = mfcbc -
+                        real CUMcbc = mfcbc -
                                          (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb +
                                           2. * (mfabb * mfcab + mfbba * mfbac)) -
                                          c1o3 * (mfcba + mfabc) * oMdrho;
-                        LBMReal CUMccb = mfccb -
+                        real CUMccb = mfccb -
                                          (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb +
                                           2. * (mfbab * mfbca + mfabb * mfcba)) -
                                          c1o3 * (mfacb + mfcab) * oMdrho;
 
                         // Cum 6.
-                        LBMReal CUMccc =
+                        real CUMccc =
                             mfccc +
                             ((-4. * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
                               4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
@@ -663,13 +665,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         // 2.
                         // linear combinations
-                        LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-                        LBMReal mxxMyy    = mfcaa - mfaca;
-                        LBMReal mxxMzz    = mfcaa - mfaac;
+                        real mxxPyyPzz = mfcaa + mfaca + mfaac;
+                        real mxxMyy    = mfcaa - mfaca;
+                        real mxxMzz    = mfcaa - mfaac;
 
-                        LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-                        LBMReal dyuy = dxux + collFactorM * c3o2 * mxxMyy;
-                        LBMReal dzuz = dxux + collFactorM * c3o2 * mxxMzz;
+                        real dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                        real dyuy = dxux + collFactorM * c3o2 * mxxMyy;
+                        real dzuz = dxux + collFactorM * c3o2 * mxxMzz;
 
                         (*divU)(x1, x2, x3) = dxux + dyuy + dzuz;
 
@@ -690,14 +692,14 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         // 3.
                         // linear combinations
-                        LBMReal mxxyPyzz = mfcba + mfabc;
-                        LBMReal mxxyMyzz = mfcba - mfabc;
+                        real mxxyPyzz = mfcba + mfabc;
+                        real mxxyMyzz = mfcba - mfabc;
 
-                        LBMReal mxxzPyyz = mfcab + mfacb;
-                        LBMReal mxxzMyyz = mfcab - mfacb;
+                        real mxxzPyyz = mfcab + mfacb;
+                        real mxxzMyyz = mfcab - mfacb;
 
-                        LBMReal mxyyPxzz = mfbca + mfbac;
-                        LBMReal mxyyMxzz = mfbca - mfbac;
+                        real mxyyPxzz = mfbca + mfbac;
+                        real mxyyMxzz = mfbca - mfbac;
 
                         // relax
                         wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1004,12 +1006,12 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         // proof correctness
                         //////////////////////////////////////////////////////////////////////////
 #ifdef PROOF_CORRECTNESS
-                        LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
+                        real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                            (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                            (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) +
                                            (mfbba + mfbbc) + mfbbb;
 
-                        LBMReal dif = rho1 - rho_post;
+                        real dif = rho1 - rho_post;
 #ifdef SINGLEPRECISION
                         if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1122,13 +1124,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         h[DIR_000] = (*this->zeroDistributionsH)(x1, x2, x3);
 
                         for (int dir = STARTF; dir < (ENDF + 1); dir++) {
-                            LBMReal velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
-                            LBMReal velSq1  = velProd * velProd;
-                            LBMReal hEq; //, gEq;
+                            real velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
+                            real velSq1  = velProd * velProd;
+                            real hEq; //, gEq;
 
                             if (dir != DIR_000) {
-                                LBMReal dirGrad_phi = (phi[dir] - phi[INVDIR[dir]]) / 2.0;
-                                LBMReal hSource     = (tauH - 0.5) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * (dirGrad_phi) / denom; 
+                                real dirGrad_phi = (phi[dir] - phi[INVDIR[dir]]) / 2.0;
+                                real hSource     = (tauH - 0.5) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * (dirGrad_phi) / denom; 
                                 hEq = phi[DIR_000] * WEIGTH[dir] * (1.0 + 3.0 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)) +                                 hSource * WEIGTH[dir];
 
                                 // This corresponds with the collision factor of 1.0 which equals (tauH + 0.5).
@@ -1140,35 +1142,35 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             }
                         }
 
-                        (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3)     = h[D3Q27System::INV_P00];
-                        (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3)     = h[D3Q27System::INV_0P0];
-                        (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3)     = h[D3Q27System::INV_00P];
-                        (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3)    = h[D3Q27System::INV_PP0];
-                        (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3)   = h[D3Q27System::INV_MP0];
-                        (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3)    = h[D3Q27System::INV_P0P];
-                        (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3)   = h[D3Q27System::INV_M0P];
-                        (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3)    = h[D3Q27System::INV_0PP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3)   = h[D3Q27System::INV_0MP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3)   = h[D3Q27System::INV_PPP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3)  = h[D3Q27System::INV_MPP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3)  = h[D3Q27System::INV_PMP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3) = h[D3Q27System::INV_MMP];
-
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3)     = h[D3Q27System::INV_M00];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3)     = h[D3Q27System::INV_0M0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p)     = h[D3Q27System::INV_00M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3)   = h[D3Q27System::INV_MM0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3)    = h[D3Q27System::INV_PM0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p)   = h[D3Q27System::INV_M0M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p)    = h[D3Q27System::INV_P0M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p)   = h[D3Q27System::INV_0MM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p)    = h[D3Q27System::INV_0PM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p) = h[D3Q27System::INV_MMM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p)  = h[D3Q27System::INV_PMM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p)  = h[D3Q27System::INV_MPM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p)   = h[D3Q27System::INV_PPM];
-
-                        (*this->zeroDistributionsH)(x1, x2, x3) = h[D3Q27System::DIR_000];
+                        (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3)     = h[INV_P00];
+                        (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3)     = h[INV_0P0];
+                        (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3)     = h[INV_00P];
+                        (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3)    = h[INV_PP0];
+                        (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3)   = h[INV_MP0];
+                        (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3)    = h[INV_P0P];
+                        (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3)   = h[INV_M0P];
+                        (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3)    = h[INV_0PP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3)   = h[INV_0MP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3)   = h[INV_PPP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3)  = h[INV_MPP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3)  = h[INV_PMP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3) = h[INV_MMP];
+
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3)     = h[INV_M00];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3)     = h[INV_0M0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p)     = h[INV_00M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3)   = h[INV_MM0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3)    = h[INV_PM0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p)   = h[INV_M0M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p)    = h[INV_P0M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p)   = h[INV_0MM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p)    = h[INV_0PM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p) = h[INV_MMM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p)  = h[INV_PMM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p)  = h[INV_MPM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p)   = h[INV_PPM];
+
+                        (*this->zeroDistributionsH)(x1, x2, x3) = h[DIR_000];
 
                         /////////////////////   END OF OLD BGK SOLVER ///////////////////////////////
                     }
@@ -1180,40 +1182,42 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseCumulantLBMKernel::gradX1_phi()
+real MultiphaseCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX1[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::gradX2_phi()
+real MultiphaseCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX2[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::gradX3_phi()
+real MultiphaseCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX3[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::nabla2_phi()
+real MultiphaseCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    using namespace vf::lbm::dir;
+
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * (phi[k] - phi[DIR_000]);
     }
@@ -1223,6 +1227,8 @@ LBMReal MultiphaseCumulantLBMKernel::nabla2_phi()
 void MultiphaseCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -1277,10 +1283,12 @@ void MultiphaseCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
index fdc47f340dbfaadfd40f4f62885350a82f2cc202..1402e35f0626399c30875d3f58bbcd256367d965 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
@@ -51,50 +51,50 @@ public:
    virtual ~MultiphaseCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
    mu::value_type muRho;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
index bd4df8aea33d26b3db75af3e00df564b7ded3efe..c294a381f6c4309577022ca16ee781775f22a31f 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -51,16 +52,16 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
 
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphasePressureFilterCompressibleAirLBMKernel::clone()
@@ -91,23 +92,27 @@ SPtr<LBMKernel> MultiphasePressureFilterCompressibleAirLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -115,20 +120,24 @@ void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWi
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -138,13 +147,15 @@ void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(LBMReal&
 void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+//	using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -155,7 +166,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 	nonLocalDistributionsH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getNonLocalDistributions();
 	zeroDistributionsH1     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -178,34 +189,34 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -242,16 +253,16 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					
-					LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH); //Incompressible
 					//LBMReal rho = rhoH + rhoToPhi * ((*pressure)(x1, x2, x3) - phiH); //wrong?
 					//! variable density -> TRANSFER!
 					//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
@@ -266,7 +277,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -274,7 +285,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 			for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 				if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-					LBMReal sum = 0.;
+					real sum = 0.;
 
 					///Version for boundaries
 					for (int xx = -1; xx <= 1; xx++) {
@@ -290,9 +301,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 								int zzz = zz + x3;
 
 								if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-									sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+									sum+= 64.0/(216.0*(c1o1+c3o1*abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 								}
-								else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+								else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 								}
 
 
@@ -338,126 +349,126 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
-
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
-
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
+
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
+
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 
 
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
 					//LBMReal rho = rhoL + (rhoH - rhoL) * phi[REST] + (one - phi[REST]) * (*pressure)(x1, x2, x3) * three; //compressible
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef=c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 
-					LBMReal gradPx = 0.0;
-					LBMReal gradPy = 0.0;
-					LBMReal gradPz = 0.0;
+					real gradPx = 0.0;
+					real gradPy = 0.0;
+					real gradPz = 0.0;
 					for (int dir1 = -1; dir1 <= 1; dir1++) {
 						for (int dir2 = -1; dir2 <= 1; dir2++) {
 							int yyy = x2 + dir1;
 							int zzz = x3 + dir2;
 							if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							int xxx = x1 + dir1;
 							if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							yyy = x2 + dir2;
 							if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 						}
 					}
 
 					//Viscosity increase by pressure gradient
-					LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+					real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 					//LBMReal limVis = 0.0000001*10;//0.01;
 					// collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 					// collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -483,14 +494,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					vvy += mu * dX2_phi * c1o2 / rho ;
 					vvz += mu * dX3_phi * c1o2 / rho;
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 
 
 					oMdrho = mfccc + mfaaa;
@@ -520,8 +531,8 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -754,21 +765,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
 					//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 					//Cum 4.
@@ -776,21 +787,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -804,21 +815,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 
 					//relax
 					mxxPyyPzz += OxxPyyPzz * (/*mfaaa*/ - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -845,14 +856,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					//relax
 					wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -879,12 +890,12 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -904,9 +915,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -936,9 +947,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfaba = -mfaba;
 					mfaab = -mfaab;
 					//////////////////////////////////////////////////////////////////////////////////////
-					mfbaa += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
-					mfaba += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
-					mfaab += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
+					mfbaa += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
+					mfaba += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
+					mfaab += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
 					////////////////////////////////////////////////////////////////////////////////////
 					//back
 					////////////////////////////////////////////////////////////////////////////////////
@@ -1152,7 +1163,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -1211,7 +1222,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -1248,31 +1259,31 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -1281,85 +1292,85 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -1374,14 +1385,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -1399,39 +1410,39 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -1472,58 +1483,72 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -1543,6 +1568,8 @@ LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
 void MultiphasePressureFilterCompressibleAirLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -1597,10 +1624,11 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -1617,10 +1645,11 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<L
 	}
 }
 
-void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
index e15f29e0434c0d5f59977226cab91455f2a39f70..65be707f90d1327cad559cc7f9361e74508bcd30 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
@@ -57,60 +57,60 @@ public:
     //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
 
 
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 protected:
     virtual void initDataSet();
     void swapDistributions() override;
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-    LBMReal f1[D3Q27System::ENDF+1];
+    real f1[D3Q27System::ENDF+1];
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
 
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal h2[D3Q27System::ENDF + 1];
-    LBMReal g  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal pr1[D3Q27System::ENDF+1];
-    LBMReal phi_cutoff[D3Q27System::ENDF+1];
+    real h  [D3Q27System::ENDF+1];
+    real h2[D3Q27System::ENDF + 1];
+    real g  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
+    real phi2[D3Q27System::ENDF + 1];
+    real pr1[D3Q27System::ENDF+1];
+    real phi_cutoff[D3Q27System::ENDF+1];
 
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
-    LBMReal gradX1_phi2();
-    LBMReal gradX2_phi2();
-    LBMReal gradX3_phi2();
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
+    real gradX1_phi2();
+    real gradX2_phi2();
+    real gradX3_phi2();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-    void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
index e1d24a2272f0846a29045bd9438db6b0dc729d36..0aafeb0b03afcfb0fe10196bc8c149a0979bdfc2 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -51,16 +52,16 @@ void MultiphasePressureFilterLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0)); // For phase-field
 
 	//SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	//dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphasePressureFilterLBMKernel::clone()
@@ -92,23 +93,26 @@ SPtr<LBMKernel> MultiphasePressureFilterLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphasePressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -116,20 +120,24 @@ void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressib
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphasePressureFilterLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphasePressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphasePressureFilterLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -139,13 +147,15 @@ void  MultiphasePressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal&
 void MultiphasePressureFilterLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+//	using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -156,7 +166,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 	nonLocalDistributionsH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getNonLocalDistributions();
 	zeroDistributionsH1     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -179,34 +189,34 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -243,16 +253,16 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-					LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+					real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
 
 					(*pressureOld)(x1, x2, x3) = (*pressure)(x1, x2, x3) + rho * c1o3 * drho;
 				}
@@ -260,7 +270,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -268,7 +278,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 			for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 				if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-					LBMReal sum = 0.;
+					real sum = 0.;
 
 					///Version for boundaries
 					for (int xx = -1; xx <= 1; xx++) {
@@ -284,9 +294,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 								int zzz = zz + x3;
 
 								if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-									sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+									sum+= 64.0/(216.0*(c1o1+c3o1 *abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 								}
-								else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+								else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 								}
 
 
@@ -332,48 +342,48 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
-
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
-
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
+
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
+
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 					dX1_phi = normX1 * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale;
                     dX2_phi = normX2 * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale;
@@ -382,77 +392,77 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef=c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 
-					LBMReal gradPx = 0.0;
-					LBMReal gradPy = 0.0;
-					LBMReal gradPz = 0.0;
+					real gradPx = 0.0;
+					real gradPy = 0.0;
+					real gradPz = 0.0;
 					for (int dir1 = -1; dir1 <= 1; dir1++) {
 						for (int dir2 = -1; dir2 <= 1; dir2++) {
 							int yyy = x2 + dir1;
 							int zzz = x3 + dir2;
 							if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							int xxx = x1 + dir1;
 							if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							yyy = x2 + dir2;
 							if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 						}
 					}
 
 					//Viscosity increase by pressure gradient
-					LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+					real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 					//LBMReal limVis = 0.0000001*10;//0.01;
 					// collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 					// collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -483,45 +493,45 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//vvz += mu * dX3_phi * c1o2 / rho;
 
 					//Abbas
-					LBMReal pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 
-					LBMReal M200 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M200 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba))))
 						+ ((mfabb + mfcbb))));
-					LBMReal M020 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M020 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfbab + mfbcb))));
-					LBMReal M002 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M002 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (+((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfbba + mfbbc))));
 
-					LBMReal M110 = ((((((mfaaa + mfccc) + (-mfcac - mfaca)) + ((mfaac + mfcca) + (-mfcaa - mfacc)))
+					real M110 = ((((((mfaaa + mfccc) + (-mfcac - mfaca)) + ((mfaac + mfcca) + (-mfcaa - mfacc)))
 						+ (((mfaab + mfccb) + (-mfacb - mfcab))))
 						));
-					LBMReal M101 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcac + mfaca) - (mfcaa + mfacc)))
+					real M101 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcac + mfaca) - (mfcaa + mfacc)))
 						+ (((mfaba + mfcbc) + (-mfabc - mfcba))))
 						));
-					LBMReal M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcaa + mfacc) - (mfcac + mfaca)))
+					real M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcaa + mfacc) - (mfcac + mfaca)))
 						+ (((mfbaa + mfbcc) + (-mfbac - mfbca))))
 						));
-					LBMReal vvxI = vvx;
-					LBMReal vvyI = vvy;
-					LBMReal vvzI = vvz;
+					real vvxI = vvx;
+					real vvyI = vvy;
+					real vvzI = vvz;
 
 					//LBMReal collFactorStore = collFactorM;
 					//LBMReal stress;
 					for (int iter = 0; iter < 1; iter++) {
-						LBMReal OxxPyyPzz = 1.0;
-						LBMReal mxxPyyPzz = (M200 - vvxI * vvxI) + (M020 - vvyI * vvyI) + (M002 - vvzI * vvzI);
-						mxxPyyPzz -= c3 * pStar;
+						real OxxPyyPzz = 1.0;
+						real mxxPyyPzz = (M200 - vvxI * vvxI) + (M020 - vvyI * vvyI) + (M002 - vvzI * vvzI);
+						mxxPyyPzz -= c3o1 * pStar;
 
-						LBMReal mxxMyy = (M200 - vvxI * vvxI) - (M020 - vvyI * vvyI);
-						LBMReal mxxMzz = (M200 - vvxI * vvxI) - (M002 - vvzI * vvzI);
-						LBMReal mxy = M110 - vvxI * vvyI;
-						LBMReal mxz = M101 - vvxI * vvzI;
-						LBMReal myz = M011 - vvyI * vvzI;
+						real mxxMyy = (M200 - vvxI * vvxI) - (M020 - vvyI * vvyI);
+						real mxxMzz = (M200 - vvxI * vvxI) - (M002 - vvzI * vvzI);
+						real mxy = M110 - vvxI * vvyI;
+						real mxz = M101 - vvxI * vvzI;
+						real myz = M011 - vvyI * vvzI;
 
 						///////Bingham
 						//LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
@@ -541,16 +551,16 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//////!Bingham
 
 
-						mxxMyy *= c1 - collFactorM * c1o2;
-						mxxMzz *= c1 - collFactorM * c1o2;
-						mxy *= c1 - collFactorM * c1o2;
-						mxz *= c1 - collFactorM * c1o2;
-						myz *= c1 - collFactorM * c1o2;
-						mxxPyyPzz *= c1 - OxxPyyPzz * c1o2;
+						mxxMyy *= c1o1 - collFactorM * c1o2;
+						mxxMzz *= c1o1 - collFactorM * c1o2;
+						mxy *= c1o1 - collFactorM * c1o2;
+						mxz *= c1o1 - collFactorM * c1o2;
+						myz *= c1o1 - collFactorM * c1o2;
+						mxxPyyPzz *= c1o1 - OxxPyyPzz * c1o2;
 						//mxxPyyPzz += c3o2 * pStar;
-						LBMReal mxx = (mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
-						LBMReal myy = (-c2 * mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
-						LBMReal mzz = (mxxMyy - c2 * mxxMzz + mxxPyyPzz) * c1o3;
+						real mxx = (mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
+						real myy = (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
+						real mzz = (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) * c1o3;
 						vvxI = vvx - (mxx * dX1_phi + mxy * dX2_phi + mxz * dX3_phi) * rhoToPhi / (rho);
 						vvyI = vvy - (mxy * dX1_phi + myy * dX2_phi + myz * dX3_phi) * rhoToPhi / (rho);
 						vvzI = vvz - (mxz * dX1_phi + myz * dX2_phi + mzz * dX3_phi) * rhoToPhi / (rho);
@@ -560,9 +570,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					}
 
 
-					forcingX1 += c2 * (vvxI - vvx);
-					forcingX2 += c2 * (vvyI - vvy);
-					forcingX3 += c2 * (vvzI - vvz);
+					forcingX1 += c2o1 * (vvxI - vvx);
+					forcingX2 += c2o1 * (vvyI - vvy);
+					forcingX3 += c2o1 * (vvzI - vvz);
 
 					mfabb += c1o2 * (-forcingX1) * c2o9;
 					mfbab += c1o2 * (-forcingX2) * c2o9;
@@ -600,14 +610,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//!Abbas
 
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 
 
 					oMdrho = mfccc + mfaaa;
@@ -637,8 +647,8 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -871,21 +881,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-					LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+					real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 					//Cum 4.
@@ -893,21 +903,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -921,21 +931,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 
 					//relax
 					mxxPyyPzz += OxxPyyPzz * (/*mfaaa*/ - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -962,14 +972,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					//relax
 					wadjust = Oxyz + (1. - Oxyz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -996,12 +1006,12 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -1021,9 +1031,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -1301,7 +1311,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -1360,7 +1370,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -1397,31 +1407,31 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -1430,85 +1440,85 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -1523,14 +1533,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -1548,39 +1558,39 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -1621,34 +1631,42 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX1_phi()
+real MultiphasePressureFilterLBMKernel::gradX1_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX2_phi()
+real MultiphasePressureFilterLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX3_phi()
+real MultiphasePressureFilterLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::nabla2_phi()
+real MultiphasePressureFilterLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -1668,6 +1686,8 @@ LBMReal MultiphasePressureFilterLBMKernel::nabla2_phi()
 void MultiphasePressureFilterLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -1722,10 +1742,11 @@ void MultiphasePressureFilterLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphasePressureFilterLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
index 9b2b568b2854b3351361d8e9687fbbc6a0d7f284..d13a5aeffa95cc3ee4980edf5cc93650ecc617a3 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
@@ -51,13 +51,13 @@ public:
     virtual ~MultiphasePressureFilterLBMKernel(void) = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
-    void setPhaseFieldBC(LBMReal bc)
+    void setPhaseFieldBC(real bc)
     {
         phaseFieldBC = bc;
     }
-    LBMReal getPhaseFieldBC()
+    real getPhaseFieldBC()
     {
         return phaseFieldBC;
     }
@@ -68,44 +68,44 @@ protected:
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
 
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
+    real h  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
 
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 
-    LBMReal phaseFieldBC { 0.0 }; // if 0.0 then light fluid on the wall, else if 1.0 havy fluid
+    real phaseFieldBC { 0.0 }; // if 0.0 then light fluid on the wall, else if 1.0 havy fluid
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
index 7424fdcbe8a36b7020e53fd78e154577fdc9ab47..f6cb731fb4eec7e49d7b946ada6fb1cf30456f2e 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
@@ -81,23 +81,27 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseScratchCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseScratchCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -105,20 +109,24 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseScratchCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseScratchCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseScratchCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+    using namespace vf::lbm::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -128,13 +136,15 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
 void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;// 1.0 / 3.0;
+	real oneOverInterfaceScale = 1.0;// 1.0 / 3.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -158,10 +168,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
 		/////For velocity filter
@@ -182,34 +192,34 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -222,43 +232,43 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						///Velocity filter
 
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
-
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-
-						LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
-
-						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3;
-						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
-
-						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
-
-						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
+
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+
+						real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+
+						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3o1;
+						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
+
+						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
+
+						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
 
 						//(*velocityX)(x1, x2, x3) = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						//	(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
@@ -279,7 +289,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
@@ -317,43 +327,43 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
                         findNeighbors(phaseField, x1, x2, x3);
 						//// reading distributions here appears to be unnecessary!
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi = 3.0*((
 						//	WEIGTH[TNE]*((((*phaseField)(x1 + 1, x2+1, x3+1)- (*phaseField)(x1 - 1, x2 - 1, x3 - 1))+ ((*phaseField)(x1 + 1, x2 - 1, x3 + 1) - (*phaseField)(x1 - 1, x2 + 1, x3 - 1)))
@@ -412,10 +422,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						//dX3_phi = (2*dX3_phi -1*dX3_phi2);// 2 * dX3_phi - dX3_phi2;
 
 
-                        LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-                        LBMReal normX1 = dX1_phi/denom;
-						LBMReal normX2 = dX2_phi/denom;
-						LBMReal normX3 = dX3_phi/denom; 
+                        real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+                        real normX1 = dX1_phi/denom;
+						real normX2 = dX2_phi/denom;
+						real normX3 = dX3_phi/denom; 
 
 
 						///test for magnitude of gradient from phase indicator directly
@@ -434,10 +444,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 						//collFactorM = phi[REST] - phiL < (phiH - phiL) * 0.05 ? collFactorG : collFactorL;
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
 						if (withForcing) {
 							// muX1 = static_cast<double>(x1-1+ix1*maxX1);
@@ -448,49 +458,49 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 							forcingX2 = muForcingX2.Eval();
 							forcingX3 = muForcingX3.Eval();
 
-							LBMReal rho_m = 1.0 / densityRatio;
+							real rho_m = 1.0 / densityRatio;
 							forcingX1 = forcingX1 * (rho - rho_m);
 							forcingX2 = forcingX2 * (rho - rho_m);
 							forcingX3 = forcingX3 * (rho - rho_m);
 						}
                             			   ////Incompressible Kernal
 
-			    mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3)/rho*c3;
-			    mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-			    mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-			    mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-			    mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-			    mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-			    mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-			    mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-			    mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-			    mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-			    mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-			    mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-			    mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
+			    mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3)/rho*c3o1;
+			    mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+			    mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+			    mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+			    mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+			    mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+			    mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+			    mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+			    mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+			    mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+			    mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+			    mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+			    mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
 
-			    mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-			    mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-			    mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-			    mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-			    mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-			    mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-			    mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-			    mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-			    mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-			    mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-			    mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-			    mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-			    mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
+			    mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+			    mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+			    mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+			    mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+			    mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+			    mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+			    mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+			    mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+			    mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+			    mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+			    mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+			    mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+			    mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
 
-			    mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
+			    mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
 
 
 
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal
 			  // FIXME: warning: unused variable 'drho'
@@ -498,13 +508,13 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 //				   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 //				   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -576,7 +586,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   ///----Classic source term 8.4.2021
 
-			   LBMReal vvxF, vvyF, vvzF;
+			   real vvxF, vvyF, vvzF;
 			   vvxF = vvx;//-2*c1o24 * lap_vx;// 
 			   vvyF = vvy;//-2*c1o24 * lap_vy;// 
 			   vvzF = vvz;//-2*c1o24 * lap_vz;// 
@@ -587,10 +597,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //if (vvxF != vvx) {
 				  // vvxF = vvxF;
 			   //}
-			   LBMReal weightGrad =  1.0-denom*denom/(denom*denom+0.0001*0.001);
-			   LBMReal dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
-			   LBMReal dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
-			   LBMReal dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
+			   real weightGrad =  1.0-denom*denom/(denom*denom+0.0001*0.001);
+			   real dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
+			   real dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
+			   real dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
 
 			   //dX1_phiF *= 1.2;
 			   //dX2_phiF *= 1.2;
@@ -603,19 +613,19 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 				  // dX2_phiF = gradPhi * normX2;
 				  // dX3_phiF = gradPhi * normX3;
 
-			   LBMReal ux2;
-			   LBMReal uy2;
-			   LBMReal uz2;
+			   real ux2;
+			   real uy2;
+			   real uz2;
 			   ux2 = vvxF * vvxF;
 			   uy2 = vvyF * vvyF;
 			   uz2 = vvzF * vvzF;
-			   LBMReal forcingTerm[D3Q27System::ENDF + 1];
+			   real forcingTerm[D3Q27System::ENDF + 1];
 			   for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-				   LBMReal velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
-				   LBMReal velSq1 = velProd * velProd;
-				   LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
+				   real velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
+				   real velSq1 = velProd * velProd;
+				   real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
 
-				   LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
+				   real fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
 				   forcingTerm[dir] = 
 					   (-vvxF) * (fac1 * dX1_phiF ) +
@@ -646,8 +656,8 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   }
 
-			   LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-			   LBMReal fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+			   real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+			   real fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
 			   forcingTerm[DIR_000] = (-vvxF) * (fac1 * dX1_phiF ) +
 				   (-vvyF) * (fac1 * dX2_phiF ) +
 				   (-vvzF) * (fac1 * dX3_phiF );
@@ -784,9 +794,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   forcingX1 = muForcingX1.Eval();
 				   forcingX2 = muForcingX2.Eval();
@@ -797,9 +807,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 				   vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-			   LBMReal vy2;
-			   LBMReal vz2;
+			   real vx2;
+			   real vy2;
+			   real vz2;
 			   vx2 = vvx * vvx;
 			   vy2 = vvy * vvy;
 			   vz2 = vvz * vvz;
@@ -809,7 +819,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -840,8 +850,8 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1072,22 +1082,22 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Cumulants
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			   //LBMReal OxyyPxzz = 2.0 - collFactorM;// 1.;//-s9;//2+s9;//
 			   //LBMReal OxyyMxzz  = 2.0 - collFactorM;// 1.;//2+s9;//
-			   LBMReal O4 = 1.0;//collFactorM;// 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.0;//collFactorM;// 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz =  8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz =  8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			   LBMReal Oxyz =  24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A =  (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz =  8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz =  8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			   real Oxyz =  24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A =  (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB =   (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB =   (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1095,21 +1105,21 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1123,10 +1133,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 				mxxPyyPzz-=mfaaa;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1152,17 +1162,17 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //mfbba += c1o6 * (dX1_phi * vvyF + dX2_phi * vvxF) * correctionScaling;
 
 
-			   LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 			   ////relax unfiltered
 			   //! divergenceFilter 10.05.2021
-			   LBMReal divMag= (1.0 - phi[DIR_000]) * (phi[DIR_000])*10*5*sqrt(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
+			   real divMag= (1.0 - phi[DIR_000]) * (phi[DIR_000])*10*5*sqrt(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
 			  // LBMReal divMag = 500 *500* 50*(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))))* (fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
 			   //LBMReal divMag = (dX1_phi * dxux) > 0 ? (dX1_phi * dxux) : 0;
 			   //divMag += (dX2_phi * dyuy) > 0 ? (dX2_phi * dyuy) : 0;
@@ -1237,14 +1247,14 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = Oxyz + (1. - Oxyz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1280,12 +1290,12 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //CUMcbb += O4 * (-CUMcbb);
 
 
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 
 
@@ -1349,9 +1359,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2513,7 +2523,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 
 			   mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
@@ -2577,7 +2587,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2592,26 +2602,26 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2620,60 +2630,60 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
-			   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration + normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-			   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration + normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-			   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration + normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration + normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration + normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration + normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2685,9 +2695,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2704,13 +2714,13 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2725,14 +2735,14 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2750,39 +2760,39 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2918,9 +2928,11 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX1_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX1_phi()
 {
+	using namespace vf::lbm::dir;
     using namespace D3Q27System;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -2931,9 +2943,11 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX2_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX2_phi()
 {
+	using namespace vf::lbm::dir;
     using namespace D3Q27System;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -2944,9 +2958,11 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX3_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX3_phi()
 {
-    using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+	using namespace D3Q27System;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -2957,10 +2973,12 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::nabla2_phi()
+real MultiphaseScratchCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -2982,6 +3000,8 @@ LBMReal MultiphaseScratchCumulantLBMKernel::nabla2_phi()
 void MultiphaseScratchCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3036,10 +3056,11 @@ void MultiphaseScratchCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseScratchCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseScratchCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
index c4cc5c263d079e402987dc38037c1dcf9e6ae6b1..6f10c9b5db0e2272d734de02a55dab1452f79d1a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
@@ -51,54 +51,54 @@ public:
    virtual ~MultiphaseScratchCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
index 179189f8665bc0361dd271b83b87e146971f654b..8ea6c1f786e700fafaa8cb8d4fe900618852a192 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
@@ -41,6 +41,8 @@
 #include <cmath>
 #include <iostream>
 #include <string>
+#include "lbm/constants/NumericConstants.h"
+//#include <basics/utilities/UbMath.h>
 
 #define PROOF_CORRECTNESS
 
@@ -53,19 +55,19 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
 	SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9));
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	p1Old = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	p1Old = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	dataSet->setH2distributions(h2);
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	phaseFieldOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 999.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseFieldOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 999.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::clone()
@@ -96,23 +98,23 @@ SPtr<LBMKernel> MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+	using namespace vf::lbm::constant;
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+	using namespace vf::lbm::constant;
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -120,20 +122,20 @@ void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChim
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+	using namespace vf::lbm::constant;
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+	using namespace vf::lbm::constant;
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -143,13 +145,14 @@ void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(LBM
 void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	using namespace vf::lbm::dir;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -165,7 +168,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 	zeroDistributionsH2     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getZeroDistributions();
 
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -179,7 +182,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 	int maxX1 = bcArrayMaxX1 - ghostLayerWidth;
 	int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
 	int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
-	LBMReal omegaDRho = 1.0;// 1.25;// 1.3;
+	real omegaDRho = 1.0;// 1.25;// 1.3;
 
 	for (int x3 = minX3-ghostLayerWidth; x3 < maxX3+ghostLayerWidth; x3++) {
 		for (int x2 = minX2-ghostLayerWidth; x2 < maxX2+ghostLayerWidth; x2++) {
@@ -189,40 +192,40 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
 								(mfbab + mfbcb) + (mfbba + mfbbc)) + mfbbb;
 					if ((*phaseField)(x1, x2, x3) > 1 ) {
-						(*phaseField)(x1, x2, x3) = c1;
+						(*phaseField)(x1, x2, x3) = c1o1;
 					}
 
 					if ((*phaseField)(x1, x2, x3) < 0) {
@@ -262,18 +265,18 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal rhoH = 1.0;
 					//LBMReal rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoH = 1.0*densityRatio;
-					LBMReal rhoL = 1.0;
+					real rhoH = 1.0*densityRatio;
+					real rhoL = 1.0;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc))   )
+					real drho = (((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc))   )
 						+ (((mfaab + mfccb) + (mfacb + mfcab) ) + ((mfaba + mfcbc) + (mfabc + mfcba) ) + ((mfbaa + mfbcc) + (mfbac + mfbca) )))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 				
 					omegaDRho = 2.0;// 1.5;
 					drho *= omegaDRho;
-					LBMReal keepDrho = drho;
+					real keepDrho = drho;
 					drho = ((*p1Old)(x1, x2, x3) + drho) * c1o2;
 				//	drho = ((*p1Old)(x1, x2, x3)*c2o3 + drho*c1o3) ;
 					(*p1Old)(x1, x2, x3) = keepDrho;
@@ -322,7 +325,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	//for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -394,92 +397,92 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-
-					LBMReal mfhcbb = (*this->localDistributionsH2)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfhbcb = (*this->localDistributionsH2)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfhbbc = (*this->localDistributionsH2)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfhccb = (*this->localDistributionsH2)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfhacb = (*this->localDistributionsH2)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfhcbc = (*this->localDistributionsH2)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfhabc = (*this->localDistributionsH2)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfhbcc = (*this->localDistributionsH2)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfhbac = (*this->localDistributionsH2)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfhccc = (*this->localDistributionsH2)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfhacc = (*this->localDistributionsH2)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfhcac = (*this->localDistributionsH2)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfhaac = (*this->localDistributionsH2)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfhabb = (*this->nonLocalDistributionsH2)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfhbab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfhbba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfhaab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfhcab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfhaba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfhcba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfhbaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfhbca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfhaaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfhcaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfhaca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfhcca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfhbbb = (*this->zeroDistributionsH2)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+
+					real mfhcbb = (*this->localDistributionsH2)(D3Q27System::ET_E, x1, x2, x3);
+					real mfhbcb = (*this->localDistributionsH2)(D3Q27System::ET_N, x1, x2, x3);
+					real mfhbbc = (*this->localDistributionsH2)(D3Q27System::ET_T, x1, x2, x3);
+					real mfhccb = (*this->localDistributionsH2)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfhacb = (*this->localDistributionsH2)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfhcbc = (*this->localDistributionsH2)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfhabc = (*this->localDistributionsH2)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfhbcc = (*this->localDistributionsH2)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfhbac = (*this->localDistributionsH2)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfhccc = (*this->localDistributionsH2)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfhacc = (*this->localDistributionsH2)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfhcac = (*this->localDistributionsH2)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfhaac = (*this->localDistributionsH2)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfhabb = (*this->nonLocalDistributionsH2)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfhbab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfhbba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfhaab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfhcab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfhaba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfhcba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfhbaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfhbca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfhaaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfhcaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfhaca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfhcca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfhbbb = (*this->zeroDistributionsH2)(x1, x2, x3);
 
 					//LBMReal rhoH = 1.0;
 					//LBMReal rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0/ densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0/ densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
 
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9+1e-3;
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9+1e-3;
 					// 01.09.2022: unclear what value we have to add to the normal: lager values better cut of in gas phase?
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 
 
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
 
 																		///scaled phase field
 					//LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) * (*phaseField)(x1, x2, x3) / ((*phaseField)(x1, x2, x3) * (*phaseField)(x1, x2, x3) + (c1 - (*phaseField)(x1, x2, x3)) * (c1 - (*phaseField)(x1, x2, x3))) - phiH);
@@ -488,23 +491,23 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH)+(one-phi[DIR_000])* (*pressure)(x1, x2, x3)*three; //compressible
 					//LBMReal rho = rhoL + (rhoH - rhoL) * phi[DIR_000] + (one - phi[DIR_000]) * (*pressure)(x1, x2, x3) * three; //compressible
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef= c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 					////Filter&Gradient merged
-					LBMReal pressureHere = (*pressureOld)(x1, x2, x3);
+					real pressureHere = (*pressureOld)(x1, x2, x3);
 					//LBMReal pressureHere = (*pressure)(x1, x2, x3);
 
-					LBMReal arrayP[3][3][3] = { {{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
+					real arrayP[3][3][3] = { {{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
 												{{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
 												{ {pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}} };
 					//LBMReal LaplaceP = 0.0;
@@ -686,7 +689,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					///////////////////////////////////////////////
 
-					LBMReal pStarStart = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStarStart = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 
@@ -793,7 +796,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 
 					//Abbas
-					LBMReal pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 					//22.09.22 not yet in balance, repaire here
@@ -1046,9 +1049,9 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ( (mfcaa + mfacc)- (mfcac + mfaca)))
 					//	+ (((mfbaa + mfbcc) + (-mfbac - mfbca))))
 					//	));
-					LBMReal vvxI = vvx;
-					LBMReal vvyI = vvy;
-					LBMReal vvzI = vvz;
+					real vvxI = vvx;
+					real vvyI = vvy;
+					real vvzI = vvz;
 
 					//LBMReal collFactorStore=collFactorM;
 					//LBMReal stress;
@@ -1164,16 +1167,16 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//!Abbas
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					//pStar =ppStar- (vx2 + vy2 + vz2)*pStar;
 				//	pStar = (pStar + ppStar)*c1o2;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 					///////////////
 						//mfabb -= pStar * c2o9;
 						//mfbab -= pStar * c2o9;
@@ -1231,8 +1234,8 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01 / (c1 + 1.0e4 * phi[DIR_000] * (c1 - phi[DIR_000])); //LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01 / (c1o1 + 1.0e4 * phi[DIR_000] * (c1o1 - phi[DIR_000])); //real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -1465,24 +1468,24 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.0; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.0; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					//collFactorM+= (1.7 - collFactorM) * fabs(mfaaa) / (fabs(mfaaa) + 0.001f);
 
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-				    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+				    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::B' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//LBMReal stress = 1.0;// stress / (stress + 1.0e-10);
 					//stress = 1.0;
 					//OxyyPxzz += stress*(1.0-OxyyPxzz);
@@ -1497,21 +1500,21 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1525,7 +1528,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 					//pStar = (mxxPyyPzz+vx2+vy2+vz2) * c1o3;//does not work
 					//pStar = (mxxPyyPzz) * c1o3;
 					//pStar = pStar + 1.5 * (mxxPyyPzz * c1o3 - pStar);
@@ -1533,25 +1536,25 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
 					///
-					LBMReal mmfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-					LBMReal mmfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
-					LBMReal mmfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
-					LBMReal mmfabb = mfabb;
-					LBMReal mmfbab = mfbab;
-					LBMReal mmfbba = mfbba;
+					real mmfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+					real mmfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
+					real mmfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
+					real mmfabb = mfabb;
+					real mmfbab = mfbab;
+					real mmfbba = mfbba;
 					///
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz)*0;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz)*0;
 					//LBMReal dxux = -c1o2 * (mxxMyy + mxxMzz) * collFactorM - mfaaa * c1o3* omegaDRho;
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 //					// attempt to improve implicit  stress computation by fixed iteration
 //					LBMReal dX2_rho = (rhoToPhi)*dX2_phi;
 //					LBMReal dX1_rho = (rhoToPhi)*dX1_phi;
@@ -1599,14 +1602,14 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					 mmfcaa += c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz- mfaaa);
 					 mmfaca += c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz- mfaaa);
@@ -1640,12 +1643,12 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -1665,9 +1668,9 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2123,7 +2126,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 
 
-					pStar += pStarStart*(omegaDRho-c1);
+					pStar += pStarStart*(omegaDRho- c1o1);
 
 					mfcbb -= c2o9*pStar;
 					mfbcb -= c2o9*pStar;
@@ -2151,7 +2154,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfcaa -= c1o72*pStar;
 					mfaca -= c1o72*pStar;
 					mfcca -= c1o72*pStar;
-					mfbbb -= c4*c2o9*pStar;
+					mfbbb -= c4o1*c2o9*pStar;
 
 					mfhbcb = rho*c2o9 * pStar;
 					mfhbbc = rho*c2o9 * pStar;
@@ -2179,7 +2182,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfhcaa = rho*c1o72 * pStar;
 					mfhaca = rho*c1o72 * pStar;
 					mfhcca = rho*c1o72 * pStar;
-					mfhbbb = rho*c4 * c2o9 * pStar;
+					mfhbbb = rho* c4o1 * c2o9 * pStar;
 
 					//mfStartbcb =  c2o9  * pStarStart;
 					//mfStartbbc =  c2o9  * pStarStart;
@@ -2442,7 +2445,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -2532,7 +2535,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2569,31 +2572,31 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2602,88 +2605,88 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						//31.05.2022 addaptive mobility
 						//omegaD = c1 + (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration))) / (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration)) + fabs((1.0 - concentration) * (concentration)) * c1o6 * oneOverInterfaceScale+1.0e-200);
 						//omegaD = c2 * (concentration * (concentration - c1)) / (-c6 * (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration))) + (concentration * (concentration - c1))+1.0e-200);
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -2698,14 +2701,14 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -2723,39 +2726,39 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2800,82 +2803,102 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi()
 {
+	using namespace vf::lbm::dir;
 	using namespace D3Q27System;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_rhoInv(LBMReal rhoL,LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_rhoInv(real rhoL,real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0/(rhoL+rhoDIV*phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_P0P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0M])) + (1.0 / (rhoL + rhoDIV * phi[DIR_P0M]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0P]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PM0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MP0])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PP0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MM0]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_P00]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M00])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_rhoInv(LBMReal rhoL,LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_rhoInv(real rhoL,real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0 / (rhoL + rhoDIV * phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_0PP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_0PM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PP0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MM0])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PM0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MP0]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_0P0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0M0])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_rhoInv(LBMReal rhoL, LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_rhoInv(real rhoL, real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0 / (rhoL + rhoDIV * phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_P0P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0M])) - (1.0 / (rhoL + rhoDIV * phi[DIR_P0M]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0P]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_0MP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0PM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_0PP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MM]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_00P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_00M])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -2896,6 +2919,8 @@ LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
 void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -2950,16 +2975,18 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
 	phi[DIR_000] = (*ph)(x1, x2, x3);
     if (phi[DIR_000] < 0) {
-        phi[DIR_000] = UbMath::zeroReal;
+        phi[DIR_000] = c0o1;
     }
 
 
@@ -2974,10 +3001,11 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArra
 	}
 }
 
-void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
index c5bc3560408698d4e83a2f45fcbeaf1b5b37317d..e2dc6d86abf7525231510df4f052cfc0768df457 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
@@ -57,69 +57,69 @@ public:
     //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
 
 
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 protected:
     virtual void initDataSet();
     void swapDistributions() override;
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-
-    LBMReal f1[D3Q27System::ENDF+1];
-
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
-
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
-
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH2;
-
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr p1Old;
-
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseFieldOld;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
-
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal h2[D3Q27System::ENDF + 1];
-    LBMReal g  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal pr1[D3Q27System::ENDF+1];
-    LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
-	LBMReal gradX1_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-	LBMReal gradX2_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-	LBMReal gradX3_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-    LBMReal gradX1_phi2();
-    LBMReal gradX2_phi2();
-    LBMReal gradX3_phi2();
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+
+    real f1[D3Q27System::ENDF+1];
+
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH2;
+
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr p1Old;
+
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseFieldOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
+
+    real h  [D3Q27System::ENDF+1];
+    real h2[D3Q27System::ENDF + 1];
+    real g  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
+    real phi2[D3Q27System::ENDF + 1];
+    real pr1[D3Q27System::ENDF+1];
+    real phi_cutoff[D3Q27System::ENDF+1];
+
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
+	real gradX1_rhoInv(real rhoL, real rhoDIV);
+	real gradX2_rhoInv(real rhoL, real rhoDIV);
+	real gradX3_rhoInv(real rhoL, real rhoDIV);
+    real gradX1_phi2();
+    real gradX2_phi2();
+    real gradX3_phi2();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-    void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
index db1397374771efd414bdeccbefe605b810cf449b..2ece81e93758e0e4923d44c2f3498ea1bdbdd67b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
@@ -82,23 +82,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -106,20 +110,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -129,13 +137,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+//    using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;
+	real oneOverInterfaceScale = 1.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -165,12 +175,12 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
         for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -181,34 +191,34 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -256,7 +266,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
@@ -291,53 +301,53 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
-
-						LBMReal dX1_phi2 = gradX1_phi2();
-						LBMReal dX2_phi2 = gradX2_phi2();
-						LBMReal dX3_phi2 = gradX3_phi2();
-
-
-                        LBMReal denom2 = sqrt(dX1_phi * dX1_phi+ dX1_phi2 * dX1_phi2 + dX2_phi * dX2_phi + dX2_phi2 * dX2_phi2 + dX3_phi * dX3_phi+ dX3_phi2 * dX3_phi2) + 1e-9;
-                        LBMReal normX1 = (dX1_phi-dX1_phi2)/denom2;
-						LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
-						LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
+
+						real dX1_phi2 = gradX1_phi2();
+						real dX2_phi2 = gradX2_phi2();
+						real dX3_phi2 = gradX3_phi2();
+
+
+                        real denom2 = sqrt(dX1_phi * dX1_phi+ dX1_phi2 * dX1_phi2 + dX2_phi * dX2_phi + dX2_phi2 * dX2_phi2 + dX3_phi * dX3_phi+ dX3_phi2 * dX3_phi2) + 1e-9;
+                        real normX1 = (dX1_phi-dX1_phi2)/denom2;
+						real normX2 = (dX2_phi-dX2_phi2)/denom2;
+						real normX3 = (dX3_phi-dX3_phi2)/denom2;
 
 						//LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
 						//LBMReal normX1 = dX1_phi / denom;
@@ -349,59 +359,59 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                             			   ////Incompressible Kernal
 
-						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3;
-						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
-
-						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
-
-						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
-
-
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3o1;
+						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
+
+						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
+
+						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
+
+
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -434,7 +444,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //////classic source term
 			   ///----Classic source term 8.4.2021
 
-			   LBMReal vvxF, vvyF, vvzF;
+			   real vvxF, vvyF, vvzF;
 			   vvxF = vvx;//-2*c1o24 * lap_vx;// 
 			   vvyF = vvy;//-2*c1o24 * lap_vy;// 
 			   vvzF = vvz;//-2*c1o24 * lap_vz;// 
@@ -445,10 +455,10 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //if (vvxF != vvx) {
 				  // vvxF = vvxF;
 			   //}
-			   LBMReal weightGrad = 1.0;// -denom * denom / (denom * denom + 0.0001 * 0.001);
-			   LBMReal dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
-			   LBMReal dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
-			   LBMReal dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
+			   real weightGrad = 1.0;// -denom * denom / (denom * denom + 0.0001 * 0.001);
+			   real dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
+			   real dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
+			   real dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
 
 			   //dX1_phiF *= 1.2;
 			   //dX2_phiF *= 1.2;
@@ -461,17 +471,17 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 				  // dX2_phiF = gradPhi * normX2;
 				  // dX3_phiF = gradPhi * normX3;
 
-			   LBMReal ux2;
-			   LBMReal uy2;
-			   LBMReal uz2;
+			   real ux2;
+			   real uy2;
+			   real uz2;
 			   ux2 = vvxF * vvxF;
 			   uy2 = vvyF * vvyF;
 			   uz2 = vvzF * vvzF;
-			   LBMReal forcingTerm[D3Q27System::ENDF + 1];
+			   real forcingTerm[D3Q27System::ENDF + 1];
 			   for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-				   LBMReal velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
-				   LBMReal velSq1 = velProd * velProd;
-				   LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
+				   real velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
+				   real velSq1 = velProd * velProd;
+				   real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
 
 				   //LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
@@ -484,7 +494,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 					  // (DX3[dir]) * (fac1 * dX3_phiF);
 
 
-				   LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 ;
+				   real fac1 = (gamma - WEIGTH[dir]) * c1o3 ;
 
 				   forcingTerm[dir] =
 					   (-vvxF) * (fac1 * (dX1_phiF * rhoH + dX2_phi2 * rhoL)) +
@@ -498,8 +508,8 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   }
 
-			   LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-			   LBMReal fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+			   real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+			   real fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
 			   forcingTerm[DIR_000] =	 (-vvxF) * (fac1 * (dX1_phiF * rhoH + dX2_phi2 * rhoL)) +
 				   (-vvyF) * (fac1 * (dX2_phiF * rhoH + dX2_phi2 * rhoL)) +
 				   (-vvzF) * (fac1 * (dX3_phiF * rhoH + dX3_phi2 * rhoL));
@@ -643,9 +653,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   //forcingX1 = muForcingX1.Eval();
 				   //forcingX2 = muForcingX2.Eval();
@@ -656,14 +666,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 				   //vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -693,8 +703,8 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -925,23 +935,23 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Cumulants
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
 			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -949,21 +959,21 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -977,10 +987,10 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 				mxxPyyPzz-=mfaaa;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -995,13 +1005,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1034,14 +1044,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1075,12 +1085,12 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1100,9 +1110,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2260,7 +2270,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2324,7 +2334,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2339,26 +2349,26 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2367,66 +2377,66 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2438,9 +2448,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2457,13 +2467,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2478,14 +2488,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2503,39 +2513,39 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2640,7 +2650,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2655,26 +2665,26 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2683,63 +2693,63 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2751,9 +2761,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -2770,13 +2780,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -2791,14 +2801,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -2816,39 +2826,39 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2981,9 +2991,11 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -2994,9 +3006,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3007,9 +3021,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3020,9 +3036,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3033,9 +3051,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3046,9 +3066,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3063,10 +3085,12 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3088,6 +3112,8 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3142,12 +3168,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
-    using namespace D3Q27System;
+	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
-    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
+	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
     phi[DIR_000] = (*ph)(x1, x2, x3);
 
@@ -3162,10 +3189,11 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal,
     }
 }
 
-void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
index a65fe073fc18258f518f72df97e6e8751adc4479..dbc94d613c4683fb19cb92a7ab7d075da41ab231 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
@@ -51,64 +51,64 @@ public:
    virtual ~MultiphaseTwoPhaseFieldsCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
-   //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
+   //CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
index 3baddc4fef5447c83b242727276fd0ec7b64c206..0d49dc68a5e331da2b30a906b2adebb3e1eadb7b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+//#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -52,8 +53,8 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::initDataSet()
     SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
     //SPtr<PhaseFieldArray3D> divU(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
     dataSet->setFdistributions(f);
     dataSet->setHdistributions(h); // For phase-field
     dataSet->setH2distributions(h2); // For phase-field
@@ -61,9 +62,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::initDataSet()
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	phaseField2 = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseField2 = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 
 }
 //////////////////////////////////////////////////////////////////////////
@@ -95,23 +96,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -119,20 +124,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -142,13 +151,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -163,7 +174,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
     nonLocalDistributionsH2 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getNonLocalDistributions();
     zeroDistributionsH2     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -182,7 +193,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	//CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
+	//CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
  //           new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
  //   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
  //       new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
@@ -198,34 +209,34 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 						(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -301,16 +312,16 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 						 mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
 
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-						LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+						real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
 						//! variable density -> TRANSFER!
 						//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
 
@@ -324,7 +335,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
 		////filter
@@ -385,7 +396,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 					if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-						LBMReal sum = 0.;
+						real sum = 0.;
 
 
 
@@ -444,9 +455,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 									int zzz = zz + x3;
 									
 									if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-										sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+										sum+= 64.0/(216.0*(c1o1+c3o1*abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 									}
-									else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+									else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 									}
 
 
@@ -512,43 +523,43 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi2 = gradX1_phi2();
 						//LBMReal dX2_phi2 = gradX2_phi2();
@@ -560,20 +571,20 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 						//LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
 						//LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
 
-						LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-						LBMReal normX1 = dX1_phi / denom;
-						LBMReal normX2 = dX2_phi / denom;
-						LBMReal normX3 = dX3_phi / denom;
+						real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+						real normX1 = dX1_phi / denom;
+						real normX2 = dX2_phi / denom;
+						real normX3 = dX3_phi / denom;
 
 
 
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
 						//! variable density -> TRANSFER!
 						//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
@@ -611,21 +622,21 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 						//mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);// / rho * c3;
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -660,59 +671,59 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				  // + WEIGTH[DIR_P00] * ((*pressure)(x1, x2, x3+1) - (*pressure)(x1, x2, x3-1)));
 			  
 			   
-			   LBMReal gradPx = 0.0;
-			   LBMReal gradPy = 0.0;
-			   LBMReal gradPz = 0.0;
+			   real gradPx = 0.0;
+			   real gradPy = 0.0;
+			   real gradPz = 0.0;
 			   for (int dir1 = -1; dir1 <= 1; dir1++) {
 				   for (int dir2 = -1; dir2 <= 1; dir2++) {
 					   int yyy = x2 + dir1;
 					   int zzz = x3 + dir2;
 					   if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   int xxx = x1 + dir1;
 					   if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   yyy = x2 + dir2;
 					   if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 				   }
 			   }
 
 			   //Viscosity increase by pressure gradient
-			   LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+			   real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 			   //LBMReal limVis = 0.0000001*10;//0.01;
 			  // collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 			  // collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -985,14 +996,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				  // //vvz += forcingX3 * deltaT * 0.5; // Z
 			   //}
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -1022,8 +1033,8 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1257,23 +1268,23 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 
 			  // mfaaa = 0.0;
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			//    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1281,21 +1292,21 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1317,13 +1328,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 			//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 			  mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 				//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1338,13 +1349,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1382,14 +1393,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1423,12 +1434,12 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1448,9 +1459,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -1483,9 +1494,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //mfbaa += -rho * rhoToPhi * c1o2 * ((mbxx + mfcaa) * dX1_phi + (mbxy + mfbba) * dX2_phi + (mbxz + mfbab) * dX3_phi);
 			   //mfaba += -rho * rhoToPhi * c1o2 * ((mbxy + mfbba) * dX1_phi + (mbyy + mfaca) * dX2_phi + (mbyz + mfabb) * dX3_phi);
 			   //mfaab += -rho * rhoToPhi * c1o2 * ((mbxz + mfbab) * dX1_phi + (mbyz + mfabb) * dX2_phi + (mbzz + mfaac) * dX3_phi);
-			   mfbaa += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
-			   mfaba += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
-			   mfaab += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
+			   mfbaa += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
+			   mfaba += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
+			   mfaab += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //back
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1731,7 +1742,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //proof correctness
 			   //////////////////////////////////////////////////////////////////////////
 //#ifdef  PROOF_CORRECTNESS
-			   LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+			   real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 				   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 				   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 //			   //LBMReal dif = fabs(drho - rho_post);
@@ -2618,7 +2629,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2682,7 +2693,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2697,26 +2708,26 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2725,66 +2736,66 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
 				  // normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[REST]) * (phi[REST])*(phi[REST]+phi2[REST]) * c1o3 * oneOverInterfaceScale;
@@ -2803,9 +2814,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2822,13 +2833,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2843,14 +2854,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2868,39 +2879,39 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3005,7 +3016,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -3020,26 +3031,26 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -3048,63 +3059,63 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -3116,9 +3127,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -3135,13 +3146,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -3156,14 +3167,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -3181,39 +3192,39 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3346,9 +3357,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -3359,9 +3372,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3372,9 +3387,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3385,9 +3402,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3398,9 +3417,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3411,9 +3432,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3428,10 +3451,12 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3453,6 +3478,8 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3507,10 +3534,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -3527,10 +3555,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<LB
     }
 }
 
-void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
index 7d20f8210474b665da49c88068746a39faacfb2e..138b24410b10b4631b1411fba3e803bde504531a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
@@ -57,74 +57,74 @@ public:
    //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
    
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
 
    void initForcing();
 
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
-
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2; 
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
-
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2; 
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
+
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
+
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
    mu::value_type muRho;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
index ffed1483ca63e674b26023aca87cb63986644813..afde9ef8bca08b862210ea7dea90db349d219dca 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
@@ -51,8 +51,8 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::initDataSet()
     SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector(nx[0] + 2, nx[1] + 2, nx[2] + 2, -999.9)); // For phase-field
     SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 2, nx[1] + 2, nx[2] + 2, -999.9)); // For phase-field
     SPtr<PhaseFieldArray3D> divU(new PhaseFieldArray3D(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
-	 pressure= CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
-	 pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
+	 pressure= CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
+	 pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
     dataSet->setFdistributions(f);
     dataSet->setHdistributions(h); // For phase-field
     dataSet->setH2distributions(h2); // For phase-field
@@ -84,23 +84,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -108,20 +112,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+	 
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -131,13 +139,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::lbm::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;
+	real oneOverInterfaceScale = 1.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -167,12 +177,12 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 #pragma omp parallel for
 	  for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -183,34 +193,34 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -286,16 +296,16 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 						 mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
 
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-						LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+						real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 						(*pressure)(x1, x2, x3) = (*pressure)(x1, x2, x3) + rho * c1o3 * drho;
 
 						////!!!!!! relplace by pointer swap!
@@ -305,7 +315,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
 		////filter
@@ -366,7 +376,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				for (int x1 = 0; x1 <= maxX1; x1++) {
 					if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-						LBMReal sum = 0.;
+						real sum = 0.;
 
 
 
@@ -422,9 +432,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 									int zzz = (zz+x3 <= maxX3) ? ((zz + x3 > 0) ? zz + x3 : maxX3 ): 0;
 
 									if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-										sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressure)(xxx, yyy, zzz);
+										sum+= 64.0/(216.0*(c1o1+c3o1 *abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressure)(xxx, yyy, zzz);
 									}
-									else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressure)(x1, x2, x3);
+									else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressure)(x1, x2, x3);
 									}
 
 
@@ -490,43 +500,43 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi2 = gradX1_phi2();
 						//LBMReal dX2_phi2 = gradX2_phi2();
@@ -538,20 +548,20 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 						//LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
 						//LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
 
-						LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-						LBMReal normX1 = dX1_phi / denom;
-						LBMReal normX2 = dX2_phi / denom;
-						LBMReal normX3 = dX3_phi / denom;
+						real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+						real normX1 = dX1_phi / denom;
+						real normX2 = dX2_phi / denom;
+						real normX3 = dX3_phi / denom;
 
 
 
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                             			   ////Incompressible Kernal
 
@@ -585,21 +595,21 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 						//mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);// / rho * c3;
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -634,52 +644,52 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				  // + WEIGTH[DIR_P00] * ((*pressure)(x1, x2, x3+1) - (*pressure)(x1, x2, x3-1)));
 			  
 			   
-			   LBMReal gradPx = 0.0;
-			   LBMReal gradPy = 0.0;
-			   LBMReal gradPz = 0.0;
+			   real gradPx = 0.0;
+			   real gradPy = 0.0;
+			   real gradPz = 0.0;
 			   for (int dir1 = -1; dir1 <= 1; dir1++) {
 				   for (int dir2 = -1; dir2 <= 1; dir2++) {
 					   int yyy = x2 + dir1;
 					   int zzz = x3 + dir2;
 					   if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 - 1, yyy, zzz)) {
-						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   int xxx = x1 + dir1;
 					   if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   yyy = x2 + dir2;
 					   if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 				   }
@@ -930,9 +940,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   //forcingX1 = muForcingX1.Eval();
 				   //forcingX2 = muForcingX2.Eval();
@@ -943,14 +953,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				   //vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -980,8 +990,8 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1215,23 +1225,23 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 
 			  // mfaaa = 0.0;
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			//    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1239,21 +1249,21 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1267,13 +1277,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 			//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 			  mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 				//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1288,13 +1298,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1332,14 +1342,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1373,12 +1383,12 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1398,9 +1408,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2557,7 +2567,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2621,7 +2631,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2636,26 +2646,26 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2664,66 +2674,66 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2735,9 +2745,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2754,13 +2764,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2775,14 +2785,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2800,39 +2810,39 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2937,7 +2947,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2952,26 +2962,26 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2980,63 +2990,63 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -3048,9 +3058,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -3067,13 +3077,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -3088,14 +3098,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -3113,39 +3123,39 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3278,9 +3288,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -3291,9 +3303,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3304,9 +3318,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3317,9 +3333,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3330,9 +3348,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3343,9 +3363,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3360,10 +3382,12 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3385,6 +3409,8 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3439,10 +3465,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -3459,10 +3486,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<
     }
 }
 
-void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
index 070aff23ff78d079d12806b529a750b007ae7137..a34858ae47c0cb5b10755b21df14290fa242115a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
@@ -51,69 +51,69 @@ public:
    virtual ~MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
    ///refactor
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
+
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp b/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
index 55f8bba509d53392c804b44e43e54970f3cf1157..79d2c26cc865ac08549a5b85bc996c0c4e9df51d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
@@ -33,13 +33,13 @@
 #include "Rheology.h"
 
 SPtr<Rheology> Rheology::instance = SPtr<Rheology>();
-LBMReal Rheology::tau0 = 0;
-LBMReal Rheology::k = 0;
-LBMReal Rheology::n = 1;
-LBMReal Rheology::omegaMin = 0;
-LBMReal Rheology::beta = 0;
-LBMReal Rheology::c = 0;
-LBMReal Rheology::mu0 = 0;
+real Rheology::tau0 = 0;
+real Rheology::k = 0;
+real Rheology::n = 1;
+real Rheology::omegaMin = 0;
+real Rheology::beta = 0;
+real Rheology::c = 0;
+real Rheology::mu0 = 0;
 
 //////////////////////////////////////////////////////////////////////////
 SPtr<Rheology> Rheology::getInstance()
@@ -49,66 +49,66 @@ SPtr<Rheology> Rheology::getInstance()
    return instance;
 }
 
-void Rheology::setYieldStress(LBMReal yieldStress)
+void Rheology::setYieldStress(real yieldStress)
 {
 	tau0 = yieldStress;
 }
-LBMReal Rheology::getYieldStress() const
+real Rheology::getYieldStress() const
 {
 	return tau0;
 }
-void Rheology::setViscosityParameter(LBMReal kParameter)
+void Rheology::setViscosityParameter(real kParameter)
 {
 	k = kParameter;
 }
-LBMReal Rheology::getViscosityParameter() const
+real Rheology::getViscosityParameter() const
 {
 	return k;
 }
-void Rheology::setPowerIndex(LBMReal index)
+void Rheology::setPowerIndex(real index)
 {
 	n = index;
 }
-LBMReal Rheology::getPowerIndex() const
+real Rheology::getPowerIndex() const
 {
 	return n;
 }
 
-void Rheology::setOmegaMin(LBMReal omega)
+void Rheology::setOmegaMin(real omega)
 {
 	omegaMin = omega;
 }
-LBMReal Rheology::getOmegaMin() const
+real Rheology::getOmegaMin() const
 {
 	return omegaMin;
 }
 
-void Rheology::setBeta(LBMReal PowellEyringBeta)
+void Rheology::setBeta(real PowellEyringBeta)
 {
 	beta = PowellEyringBeta;
 }
 
-LBMReal Rheology::getBeta() const
+real Rheology::getBeta() const
 {
 	return beta;
 }
 
-void Rheology::setC(LBMReal PowellEyringC)
+void Rheology::setC(real PowellEyringC)
 {
 	c = PowellEyringC;
 }
 
-LBMReal Rheology::getC() const
+real Rheology::getC() const
 {
 	return c;
 }
 
-void Rheology::setMu0(LBMReal mu)
+void Rheology::setMu0(real mu)
 {
 	mu0 = mu;
 }
 
-LBMReal Rheology::getMu0() const
+real Rheology::getMu0() const
 {
 	return mu0;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/Rheology.h b/src/cpu/VirtualFluidsCore/LBM/Rheology.h
index b1aa22478a69a77be2aa0275ddb7a44b6e626305..ef0efc76e170a79c644e7cc6a94f592fdc119572 100644
--- a/src/cpu/VirtualFluidsCore/LBM/Rheology.h
+++ b/src/cpu/VirtualFluidsCore/LBM/Rheology.h
@@ -38,6 +38,7 @@
 #include <LBMSystem.h>
 #include <UbMath.h>
 #include <math.h> 
+#include "lbm/constants/NumericConstants.h"
 
 class Rheology
 {
@@ -45,53 +46,53 @@ public:
 	Rheology(Rheology const&) = delete;
 	Rheology& operator=(Rheology const&) = delete;
 	static SPtr<Rheology> getInstance();
-	void setYieldStress(LBMReal tau0);
-	LBMReal getYieldStress() const;
+	void setYieldStress(real tau0);
+	real getYieldStress() const;
 	
-	void setViscosityParameter(LBMReal k);
-	LBMReal getViscosityParameter() const;
+	void setViscosityParameter(real k);
+	real getViscosityParameter() const;
 
-	void setPowerIndex(LBMReal n);
-	LBMReal getPowerIndex() const;
+	void setPowerIndex(real n);
+	real getPowerIndex() const;
 
-	void setOmegaMin(LBMReal omegaMin);
-	LBMReal getOmegaMin() const;
+	void setOmegaMin(real omegaMin);
+	real getOmegaMin() const;
 
-	void setBeta(LBMReal PowellEyringBeta);
-	LBMReal getBeta() const;
+	void setBeta(real PowellEyringBeta);
+	real getBeta() const;
 
-	void setC(LBMReal PowellEyringC);
-	LBMReal getC() const;
+	void setC(real PowellEyringC);
+	real getC() const;
 
-	void setMu0(LBMReal mu);
-	LBMReal getMu0() const;
+	void setMu0(real mu);
+	real getMu0() const;
 
-	static LBMReal getBinghamCollFactorOld(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getHerschelBulkleyCollFactorBackward(LBMReal shearRate, LBMReal drho);
-	static LBMReal getPowellEyringCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
+	static real getBinghamCollFactorOld(real omegaInf, real shearRate, real drho);
+	static real getBinghamCollFactor(real omegaInf, real shearRate, real drho);
+	static real getHerschelBulkleyCollFactor(real omegaInf, real shearRate, real drho);
+	static real getHerschelBulkleyCollFactorBackward(real shearRate, real drho);
+	static real getPowellEyringCollFactor(real omegaInf, real shearRate, real drho);
 private:
 	Rheology();
 	
 	static SPtr<Rheology> instance;
 
-	static LBMReal tau0;
-	static LBMReal k;
-	static LBMReal n;
-	static LBMReal omegaMin;
-	static LBMReal beta;
-	static LBMReal c;
-	static LBMReal mu0;
+	static real tau0;
+	static real k;
+	static real n;
+	static real omegaMin;
+	static real beta;
+	static real c;
+	static real mu0;
 };
 
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getBinghamCollFactor(real omegaInf, real shearRate, real drho)
 {
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
+	real cs2 = vf::lbm::constant::one_over_sqrt3 * vf::lbm::constant::one_over_sqrt3;
+	real rho = vf::lbm::constant::c1o1 + drho;
 	//analytical solution
-	LBMReal omega = omegaInf * (UbMath::one - (omegaInf * tau0) / (shearRate * cs2 * rho + UbMath::Epsilon<LBMReal>::val()));
+	real omega = omegaInf * (vf::lbm::constant::c1o1 - (omegaInf * tau0) / (shearRate * cs2 * rho + UbMath::Epsilon<real>::val()));
 	
 	//LBMReal omega = cs2 * cs2 * shearRate * shearRate * omegaInf * rho * rho / (cs2 * cs2 * shearRate * shearRate * rho * rho + cs2 * shearRate * omegaInf * rho * tau0+omegaInf*omegaInf*tau0*tau0);
 	
@@ -117,30 +118,30 @@ inline LBMReal Rheology::getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRat
 	return omega;
 }
 
-inline LBMReal Rheology::getBinghamCollFactorOld(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getBinghamCollFactorOld(real omegaInf, real shearRate, real drho)
 {
-	const LBMReal cs2 = UbMath::c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
+	const real cs2 = vf::lbm::constant::c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
+	real rho = vf::lbm::constant::c1o1 + drho;
 
-	if (rho * cs2 * (UbMath::c1 / omegaInf - UbMath::c1o2) * shearRate < tau0)
+	if (rho * cs2 * (vf::lbm::constant::c1o1 / omegaInf - vf::lbm::constant::c1o2) * shearRate < tau0)
 		return 0.0;
 	else
 		return omegaInf;
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getHerschelBulkleyCollFactor(real omegaInf, real shearRate, real drho)
 {
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
-	LBMReal gammaDot = shearRate;
-	LBMReal omega = omegaInf;
-	LBMReal epsilon = 1;
-	LBMReal gammaDotPowN = std::pow(gammaDot, n);
+	real cs2 = vf::lbm::constant::one_over_sqrt3 * vf::lbm::constant::one_over_sqrt3;
+	real rho = vf::lbm::constant::c1o1 + drho;
+	real gammaDot = shearRate;
+	real omega = omegaInf;
+	real epsilon = 1;
+	real gammaDotPowN = std::pow(gammaDot, n);
 
 	while (epsilon > 1e-10)
 	{
-		LBMReal omegaOld = omega;
-		LBMReal omegaByOmegaInfPowN = std::pow(omega / omegaInf, n);/*
+		real omegaOld = omega;
+		real omegaByOmegaInfPowN = std::pow(omega / omegaInf, n);/*
 		LBMReal gammaDotPowOneMinusN = std::pow(gammaDot,1- n);
 		LBMReal omegaByOmegaInfPowOneMinusN = std::pow(omega / omegaInf, 1-n);
 		LBMReal numeratorA = (2.0* k *  omegaInf + cs2 * gammaDotPowOneMinusN * omegaByOmegaInfPowOneMinusN *omegaInf* rho );
@@ -148,10 +149,10 @@ inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal
 		LBMReal denominatorA = (2.0 * k * n * omegaInf + cs2 * gammaDot * rho * omegaInf* gammaDotPowOneMinusN * omegaByOmegaInfPowOneMinusN) + UbMath::Epsilon<LBMReal>::val();
 		LBMReal denominatorB = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<LBMReal>::val();
 		omega = omega - omega *( numeratorA / denominatorA+ numeratorB / denominatorB);*/
-		LBMReal numerator = (2.0 * gammaDotPowN * k * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * (omega - 2.0) * rho + 2.0 * omegaInf * tau0);
-		LBMReal denominator = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<LBMReal>::val();
+		real numerator = (2.0 * gammaDotPowN * k * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * (omega - 2.0) * rho + 2.0 * omegaInf * tau0);
+		real denominator = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<real>::val();
 		omega = omega - omega * numerator / denominator;
-		omega = (omega < UbMath::zeroReal) ? UbMath::c1o2 * omegaOld : omega;
+		omega = (omega < vf::lbm::constant::c0o1) ? vf::lbm::constant::c1o2 * omegaOld : omega;
         //omega = (omega < omegaMin) ? UbMath::c1o2 * (omegaOld-omegaMin)+omegaMin : omega;
 		epsilon = std::abs(omega - omegaOld);
 	}
@@ -159,36 +160,38 @@ inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal
 	return omega;
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getHerschelBulkleyCollFactorBackward(LBMReal shearRate, LBMReal drho)
+inline real Rheology::getHerschelBulkleyCollFactorBackward(real shearRate, real drho)
 {
-	LBMReal rho = UbMath::one + drho;
-	LBMReal gamma = shearRate + UbMath::Epsilon<LBMReal>::val();
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
+	real rho = vf::lbm::constant::c1o1 + drho;
+	real gamma = shearRate + UbMath::Epsilon<real>::val();
+	real cs2 = vf::lbm::constant::one_over_sqrt3 * vf::lbm::constant::one_over_sqrt3;
 
-	return 1.0 / ((tau0 + k * std::pow(gamma, n)) / (cs2 * rho * gamma) + UbMath::c1o2);
+	return 1.0 / ((tau0 + k * std::pow(gamma, n)) / (cs2 * rho * gamma) + vf::lbm::constant::c1o2);
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getPowellEyringCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getPowellEyringCollFactor(real omegaInf, real shearRate, real drho)
 {
-	using namespace UbMath;
-	LBMReal cs2 = c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
-	LBMReal rho = c1 + drho;
-	LBMReal gammaDot = shearRate;
-	LBMReal omega = omegaInf;
-	LBMReal epsilon = 1;
+//	using namespace UbMath;
+	using namespace vf::lbm::constant;
+
+	real cs2 = c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
+	real rho = c1o1 + drho;
+	real gammaDot = shearRate;
+	real omega = omegaInf;
+	real epsilon = 1;
 
 	while (epsilon > 1e-10)
 	{
-		LBMReal omegaOld = omega;
+		real omegaOld = omega;
 		epsilon = std::abs(omega - omegaOld);
 
-		LBMReal numerator = c*sqrt(c1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(beta*(c2*gammaDot*mu0*omega+cs2*gammaDot*(omega-c2)*rho+c2*omegaInf*tau0)+c2*omegaInf*(asinh((gammaDot*omega)/(c*omegaInf))));
+		real numerator = c*sqrt(c1o1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(beta*(c2o1*gammaDot*mu0*omega+cs2*gammaDot*(omega-c2o1)*rho+c2o1*omegaInf*tau0)+c2o1*omegaInf*(asinh((gammaDot*omega)/(c*omegaInf))));
 
-		LBMReal denominator = gammaDot*(c2+beta*c*sqrt(c1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(c2*mu0+cs2*rho)) + UbMath::Epsilon<LBMReal>::val();
+		real denominator = gammaDot*(c2o1+beta*c*sqrt(c1o1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(c2o1*mu0+cs2*rho)) + UbMath::Epsilon<real>::val();
 
 		omega = omega - numerator / denominator;
 
-		omega = (omega < UbMath::zeroReal) ? UbMath::c1o2 * omegaOld : omega;
+		omega = (omega < c0o1) ? c1o2 * omegaOld : omega;
 	}
 
 	return omega;
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
index 5bd2601dad811be6433eaea1d6acafc0a3e54f4c..da51e6c4e11f67ff48efe7a34c7eeaf900e9e730 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
@@ -61,7 +61,7 @@ public:
 		return kernel;
 	}
 protected:	
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
index 57478041e04e6a07579be53d58b688866e964e75..2422efefd52cdbfac183a9fdd19b9b2f5a5fee70 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
@@ -60,7 +60,7 @@ public:
 		return kernel;
 	}
 protected:
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
index 09cd40c8eceb10fa57ba136ea5f1439211f928ab..0a9c380dece3ba90f7d2d3d5d2a84ceadfcf4850 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
@@ -42,7 +42,7 @@ RheologyInterpolationProcessor::RheologyInterpolationProcessor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-RheologyInterpolationProcessor::RheologyInterpolationProcessor(LBMReal omegaC, LBMReal omegaF, LBMReal omegaMin)
+RheologyInterpolationProcessor::RheologyInterpolationProcessor(real omegaC, real omegaF, real omegaMin)
    : omegaC(omegaC), omegaF(omegaF), omegaMin(omegaMin)
 {
 
@@ -59,18 +59,18 @@ InterpolationProcessorPtr RheologyInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void RheologyInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOmegaMin( LBMReal omegaMin )
+void RheologyInterpolationProcessor::setOmegaMin( real omegaMin )
 {
    this->omegaMin = omegaMin;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -80,7 +80,7 @@ void RheologyInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMR
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
     setOffsets(xoff, yoff, zoff);
     calcInterpolatedCoefficiets_intern(icellC, omegaC, 0.5, 0.25, -0.25, -0.25, -1, -1, -1);
@@ -101,60 +101,61 @@ void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC,
     calcInterpolatedNode(icellF.TNE, /*omegaF,*/  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
     calcInterpolatedCoefficiets_intern(icellF, omegaF, 2.0, 0, 0, 0, 0, 0, 0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omegaInf, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void RheologyInterpolationProcessor::calcMoments(const real* const f, real omegaInf, real& press, real& vx1, real& vx2, real& vx3, real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    rho = 0.0;
    D3Q27System::calcIncompMacroscopicValues(f,rho,vx1,vx2,vx3);
 
    shearRate = D3Q27System::getShearRate(f, omegaInf);
 
-   LBMReal omega = Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, rho);
+   real omega = Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, rho);
 
    press = rho; //interpolate rho!
 
    kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))-(vx1*vx2));// might not be optimal MG 25.2.13
    kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))-(vx2*vx3));
    kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[D3Q27System::DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
    kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
 void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell,
-                                                                          LBMReal omega,
-                                                                          LBMReal eps_new,
-                                                                          LBMReal x,
-                                                                          LBMReal y,
-                                                                          LBMReal z,
-                                                                          LBMReal xs,
-                                                                          LBMReal ys,
-                                                                          LBMReal zs)
+                                                                          real omega,
+                                                                          real eps_new,
+                                                                          real x,
+                                                                          real y,
+                                                                          real z,
+                                                                          real xs,
+                                                                          real ys,
+                                                                          real zs)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -309,18 +310,18 @@ void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   LBMReal dxux = ax + 0.5*axx*xs+ 0.25*(axy*ys+axz*zs)+0.0625*axyz*ys*zs;
-   LBMReal dyuy = by + 0.5 * byy * ys + 0.25 * (bxy * xs + byz * zs) + 0.0625 * bxyz * xs * zs;
-   LBMReal dzuz = cz + 0.5 * czz * zs + 0.25 * (cxz * xs + cyz * ys) + 0.0625 * cxyz * xs * ys;
+   real dxux = ax + 0.5*axx*xs+ 0.25*(axy*ys+axz*zs)+0.0625*axyz*ys*zs;
+   real dyuy = by + 0.5 * byy * ys + 0.25 * (bxy * xs + byz * zs) + 0.0625 * bxyz * xs * zs;
+   real dzuz = cz + 0.5 * czz * zs + 0.25 * (cxz * xs + cyz * ys) + 0.0625 * cxyz * xs * ys;
 
-   LBMReal Dxy = bx + 0.5 * bxx * xs + 0.25 * (bxy * ys + bxz * zs) + 0.0625 * bxyz * ys * zs + ay + 0.5 * ayy * ys + 0.25 * (axy * xs + ayz * zs) + 0.0625 * axyz * xs * zs;
-   LBMReal Dxz = cx + 0.5 * cxx * xs + 0.25 * (cxy * ys + cxz * zs) + 0.0625 * cxyz * ys * zs + az + 0.5 * azz * zs + 0.25 * (axz * xs + ayz * ys) + 0.0625 * axyz * xs * ys;
-   LBMReal Dyz = cy + 0.5 * cyy * ys + 0.25 * (cxy * xs + cyz * zs) + 0.0625 * cxyz * xs * zs + bz + 0.5 * bzz * zs + 0.25 * (bxz * xs + byz * ys) + 0.0625 * bxyz * xs * ys;
+   real Dxy = bx + 0.5 * bxx * xs + 0.25 * (bxy * ys + bxz * zs) + 0.0625 * bxyz * ys * zs + ay + 0.5 * ayy * ys + 0.25 * (axy * xs + ayz * zs) + 0.0625 * axyz * xs * zs;
+   real Dxz = cx + 0.5 * cxx * xs + 0.25 * (cxy * ys + cxz * zs) + 0.0625 * cxyz * ys * zs + az + 0.5 * azz * zs + 0.25 * (axz * xs + ayz * ys) + 0.0625 * axyz * xs * ys;
+   real Dyz = cy + 0.5 * cyy * ys + 0.25 * (cxy * xs + cyz * zs) + 0.0625 * cxyz * xs * zs + bz + 0.5 * bzz * zs + 0.25 * (bxz * xs + byz * ys) + 0.0625 * bxyz * xs * ys;
 
    shearRate = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz);
 
 
-   LBMReal o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
+   real o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
 
    if (o < omegaMin)
       o = omegaMin;
@@ -431,16 +432,17 @@ void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedNode(LBMReal* f, /*LBMReal omega,*/ LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void RheologyInterpolationProcessor::calcInterpolatedNode(real* f, /*real omega,*/ real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -473,7 +475,7 @@ void RheologyInterpolationProcessor::calcInterpolatedNode(LBMReal* f, /*LBMReal
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBSW()
+real RheologyInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -486,7 +488,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTSW()
+real RheologyInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -499,7 +501,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTSE()
+real RheologyInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -512,7 +514,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBSE()
+real RheologyInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -525,7 +527,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBNW()
+real RheologyInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -538,7 +540,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTNW()
+real RheologyInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -551,7 +553,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTNE()
+real RheologyInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -564,7 +566,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBNE()
+real RheologyInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -577,11 +579,12 @@ LBMReal RheologyInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void RheologyInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -589,30 +592,30 @@ void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;
+   real rho = press ;
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
+   real eps_new = 2.;
    
 
-   LBMReal dxux = ax;
-   LBMReal dyuy = by;
-   LBMReal dzuz = cz;
+   real dxux = ax;
+   real dyuy = by;
+   real dzuz = cz;
 
-   LBMReal Dxy = bx + ay;
-   LBMReal Dxz = cx + az;
-   LBMReal Dyz = cy + bz;
+   real Dxy = bx + ay;
+   real Dxz = cx + az;
+   real Dyz = cy + bz;
 
    shearRate = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz);
 
 
-   LBMReal o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
+   real o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
 
    if (o < omegaMin)
       o = omegaMin;
@@ -661,14 +664,14 @@ void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void RheologyInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void RheologyInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
index bce0c3d89e137738d9169fab5dbe15cd3c91f8ad..178932204307606b9fc48d2745ebf1353547e3e8 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
@@ -42,55 +42,55 @@ class RheologyInterpolationProcessor : public InterpolationProcessor
 {
 public:
    RheologyInterpolationProcessor();
-   RheologyInterpolationProcessor(LBMReal omegaC, LBMReal omegaF, LBMReal omegaMin);
+   RheologyInterpolationProcessor(real omegaC, real omegaF, real omegaMin);
    virtual ~RheologyInterpolationProcessor();
    InterpolationProcessorPtr clone();
-   void setOmegas(LBMReal omegaC, LBMReal omegaF);
-   void setOmegaMin(LBMReal omegaMin);
+   void setOmegas(real omegaC, real omegaF);
+   void setOmegaMin(real omegaMin);
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF);
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff);
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC); 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff); 
-   //LBMReal forcingC, forcingF;
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff);
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC); 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff); 
+   //real forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC, omegaF;
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC, omegaF;
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-   LBMReal rho;
-   LBMReal shearRate;
+   real rho;
+   real shearRate;
 
-   LBMReal omegaMin;
+   real omegaMin;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff);
-   void calcMoments(const LBMReal* const f, LBMReal omegaInf, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3,
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new, LBMReal x, LBMReal y, LBMReal z, LBMReal xs, LBMReal ys, LBMReal zs);
-   void calcInterpolatedNode(LBMReal* f, /*LBMReal omega,*/ LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega);
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3);
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz);
+   void setOffsets(real xoff, real yoff, real zoff);
+   void calcMoments(const real* const f, real omegaInf, real& rho, real& vx1, real& vx2, real& vx3,
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell, real omega, real eps_new, real x, real y, real z, real xs, real ys, real zs);
+   void calcInterpolatedNode(real* f, /*real omega,*/ real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega);
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3);
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz);
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -99,7 +99,7 @@ inline void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell&
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
index 94fbad358b16d923ddc5425e6476ff3892bbbf3c..81dfc86d12daa48d9b238097e4e82f907b64abfb 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
@@ -43,7 +43,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 RheologyK17LBMKernel::RheologyK17LBMKernel()
@@ -85,7 +86,7 @@ SPtr<LBMKernel> RheologyK17LBMKernel::clone()
    } 
    else
    {
-      OxxPyyPzz = one;
+      OxxPyyPzz = c1o1;
    }
 
    dynamicPointerCast<RheologyK17LBMKernel>(kernel)->OxxPyyPzz = this->OxxPyyPzz;
@@ -190,63 +191,63 @@ void RheologyK17LBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               ////////////////////////////////////////////////////////////////////////////////////
+               real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                   (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                   ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-               LBMReal rho = one+drho;
+               real rho = c1o1+drho;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+               real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                   (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                   (mfcbb-mfabb))/rho;
-               LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+               real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                   (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                   (mfbcb-mfbab))/rho;
-               LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+               real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                   (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                   (mfbbc-mfbba))/rho;
                ////////////////////////////////////////////////////////////////////////////////////
 
-               LBMReal omega = collFactor;
+               real omega = collFactor;
 
                //forcing 
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -258,20 +259,20 @@ void RheologyK17LBMKernel::calculate(int step)
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
          ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal oMdrho = one; // comp special
+               real oMdrho = c1o1; // comp special
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal m0, m1, m2;
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real m0, m1, m2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx*vvx;
                vy2 = vvy*vvy;
                vz2 = vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimitP = 0.01;// * 0.0001f;
-               LBMReal qudricLimitM = 0.01;// * 0.0001f;
-               LBMReal qudricLimitD = 0.01;// * 0.001f;
+               real wadjust;
+               real qudricLimitP = 0.01;// * 0.0001f;
+               real qudricLimitM = 0.01;// * 0.0001f;
+               real qudricLimitD = 0.01;// * 0.001f;
                //LBMReal s9 = minusomega;
                //test
                //s9 = 0.;
@@ -287,7 +288,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o36 * oMdrho;
                mfaab = m1-m0 * vvz;
-               mfaac = m2-two*	m1 * vvz+vz2 * m0;
+               mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfabc;
                m1 = mfabc-mfaba;
@@ -295,7 +296,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaba = m0;
                m0 += c1o9 * oMdrho;
                mfabb = m1-m0 * vvz;
-               mfabc = m2-two*	m1 * vvz+vz2 * m0;
+               mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfacc;
                m1 = mfacc-mfaca;
@@ -303,7 +304,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o36 * oMdrho;
                mfacb = m1-m0 * vvz;
-               mfacc = m2-two*	m1 * vvz+vz2 * m0;
+               mfacc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbac;
@@ -312,7 +313,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c1o9 * oMdrho;
                mfbab = m1-m0 * vvz;
-               mfbac = m2-two*	m1 * vvz+vz2 * m0;
+               mfbac = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbba+mfbbc;
                m1 = mfbbc-mfbba;
@@ -320,7 +321,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbba = m0;
                m0 += c4o9 * oMdrho;
                mfbbb = m1-m0 * vvz;
-               mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbbc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbca+mfbcc;
                m1 = mfbcc-mfbca;
@@ -328,7 +329,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbca = m0;
                m0 += c1o9 * oMdrho;
                mfbcb = m1-m0 * vvz;
-               mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbcc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcac;
@@ -337,7 +338,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o36 * oMdrho;
                mfcab = m1-m0 * vvz;
-               mfcac = m2-two*	m1 * vvz+vz2 * m0;
+               mfcac = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcba+mfcbc;
                m1 = mfcbc-mfcba;
@@ -345,7 +346,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcba = m0;
                m0 += c1o9 * oMdrho;
                mfcbb = m1-m0 * vvz;
-               mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfcbc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcca+mfccc;
                m1 = mfccc-mfcca;
@@ -353,7 +354,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcca = m0;
                m0 += c1o36 * oMdrho;
                mfccb = m1-m0 * vvz;
-               mfccc = m2-two*	m1 * vvz+vz2 * m0;
+               mfccc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -365,14 +366,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o6 * oMdrho;
                mfaba = m1-m0 * vvy;
-               mfaca = m2-two*	m1 * vvy+vy2 * m0;
+               mfaca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfacb;
                m1 = mfacb-mfaab;
                m0 = m2+mfabb;
                mfaab = m0;
                mfabb = m1-m0 * vvy;
-               mfacb = m2-two*	m1 * vvy+vy2 * m0;
+               mfacb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfacc;
                m1 = mfacc-mfaac;
@@ -380,7 +381,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o18 * oMdrho;
                mfabc = m1-m0 * vvy;
-               mfacc = m2-two*	m1 * vvy+vy2 * m0;
+               mfacc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbca;
@@ -389,14 +390,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c2o3 * oMdrho;
                mfbba = m1-m0 * vvy;
-               mfbca = m2-two*	m1 * vvy+vy2 * m0;
+               mfbca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbab+mfbcb;
                m1 = mfbcb-mfbab;
                m0 = m2+mfbbb;
                mfbab = m0;
                mfbbb = m1-m0 * vvy;
-               mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbac+mfbcc;
                m1 = mfbcc-mfbac;
@@ -404,7 +405,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbac = m0;
                m0 += c2o9 * oMdrho;
                mfbbc = m1-m0 * vvy;
-               mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcca;
@@ -413,14 +414,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o6 * oMdrho;
                mfcba = m1-m0 * vvy;
-               mfcca = m2-two*	m1 * vvy+vy2 * m0;
+               mfcca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcab+mfccb;
                m1 = mfccb-mfcab;
                m0 = m2+mfcbb;
                mfcab = m0;
                mfcbb = m1-m0 * vvy;
-               mfccb = m2-two*	m1 * vvy+vy2 * m0;
+               mfccb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcac+mfccc;
                m1 = mfccc-mfcac;
@@ -428,7 +429,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcac = m0;
                m0 += c1o18 * oMdrho;
                mfcbc = m1-m0 * vvy;
-               mfccc = m2-two*	m1 * vvy+vy2 * m0;
+               mfccc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -438,16 +439,16 @@ void RheologyK17LBMKernel::calculate(int step)
                m1 = mfcaa-mfaaa;
                m0 = m2+mfbaa;
                mfaaa = m0;
-               m0 += one* oMdrho;
+               m0 += c1o1 * oMdrho;
                mfbaa = m1-m0 * vvx;
-               mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+               mfcaa = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfcba;
                m1 = mfcba-mfaba;
                m0 = m2+mfbba;
                mfaba = m0;
                mfbba = m1-m0 * vvx;
-               mfcba = m2-two*	m1 * vvx+vx2 * m0;
+               mfcba = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfcca;
                m1 = mfcca-mfaca;
@@ -455,7 +456,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o3 * oMdrho;
                mfbca = m1-m0 * vvx;
-               mfcca = m2-two*	m1 * vvx+vx2 * m0;
+               mfcca = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfcab;
@@ -463,21 +464,21 @@ void RheologyK17LBMKernel::calculate(int step)
                m0 = m2+mfbab;
                mfaab = m0;
                mfbab = m1-m0 * vvx;
-               mfcab = m2-two*	m1 * vvx+vx2 * m0;
+               mfcab = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabb+mfcbb;
                m1 = mfcbb-mfabb;
                m0 = m2+mfbbb;
                mfabb = m0;
                mfbbb = m1-m0 * vvx;
-               mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbb = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacb+mfccb;
                m1 = mfccb-mfacb;
                m0 = m2+mfbcb;
                mfacb = m0;
                mfbcb = m1-m0 * vvx;
-               mfccb = m2-two*	m1 * vvx+vx2 * m0;
+               mfccb = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfcac;
@@ -486,14 +487,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o3 * oMdrho;
                mfbac = m1-m0 * vvx;
-               mfcac = m2-two*	m1 * vvx+vx2 * m0;
+               mfcac = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabc+mfcbc;
                m1 = mfcbc-mfabc;
                m0 = m2+mfbbc;
                mfabc = m0;
                mfbbc = m1-m0 * vvx;
-               mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacc+mfccc;
                m1 = mfccc-mfacc;
@@ -501,7 +502,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfacc = m0;
                m0 += c1o9 * oMdrho;
                mfbcc = m1-m0 * vvx;
-               mfccc = m2-two*	m1 * vvx+vx2 * m0;
+               mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
 
@@ -545,47 +546,47 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////
                //4.
                //////////////////////////////
-               LBMReal O4 = one;
+               real O4 = c1o1;
                //////////////////////////////
                //LBMReal O4        = omega;//TRT
                ////////////////////////////////////////////////////////////
                //5.
                //////////////////////////////
-               LBMReal O5 = one;
+               real O5 = c1o1;
                ////////////////////////////////////////////////////////////
                //6.
                //////////////////////////////
-               LBMReal O6 = one;
+               real O6 = c1o1;
                ////////////////////////////////////////////////////////////
 
 
                //central moments to cumulants
                //4.
-               LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-               LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-               LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+ c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+               real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+ c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+ c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-               LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-               LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-               LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcca = mfcca-(((mfcaa * mfaca+ c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcac = mfcac-(((mfcaa * mfaac+ c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+               real CUMacc = mfacc-(((mfaac * mfaca+ c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                //5.
-               LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+ c4o1 * mfabb * mfbbb+ c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+ c4o1 * mfbab * mfbbb+ c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+ c4o1 * mfbba * mfbbb+ c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+               real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -595,9 +596,9 @@ void RheologyK17LBMKernel::calculate(int step)
 
    //2.
    // linear combinations
-               LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-               LBMReal mxxMyy = mfcaa-mfaca;
-               LBMReal mxxMzz = mfcaa-mfaac;
+               real mxxPyyPzz = mfcaa+mfaca+mfaac;
+               real mxxMyy = mfcaa-mfaca;
+               real mxxMzz = mfcaa-mfaac;
 
                //////////////////////////////////////////////////////////////////////////
       // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -637,17 +638,17 @@ void RheologyK17LBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
 
-               LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz);// +c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+               real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz);// +c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+               real dyuy = dxux+omega * c3o2 * mxxMyy;
+               real dzuz = dxux+omega * c3o2 * mxxMzz;
 
-               LBMReal Dxy =-three*omega*mfbba;
-               LBMReal Dxz =-three*omega*mfbab;
-               LBMReal Dyz =-three*omega*mfabb;
+               real Dxy =-c3o1 *omega*mfbba;
+               real Dxz =-c3o1 *omega*mfbab;
+               real Dyz =-c3o1 *omega*mfabb;
 
                ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //non Newtonian fluid collision factor
-               LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (drho + c1);
+               real shearRate = sqrt(c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (drho + c1o1);
                omega = getRheologyCollFactor(omega, shearRate, rho);
                //omega = Rheology::getHerschelBulkleyCollFactor(omega, shearRate, drho);
                //omega = Rheology::getBinghamCollFactor(omega, shearRate, drho);
@@ -657,39 +658,39 @@ void RheologyK17LBMKernel::calculate(int step)
                dyuy = dxux + omega * c3o2 * mxxMyy;
                dzuz = dxux + omega * c3o2 * mxxMzz;
 
-               Dxy = -three * omega * mfbba;
-               Dxz = -three * omega * mfbab;
-               Dyz = -three * omega * mfabb;
+               Dxy = -c3o1 * omega * mfbba;
+               Dxz = -c3o1 * omega * mfbab;
+               Dyz = -c3o1 * omega * mfabb;
 
-               mxxMyy += omega * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-               mxxMzz += omega * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+               mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+               mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                mfabb += omega * (-mfabb);
                mfbab += omega * (-mfbab);
                mfbba += omega * (-mfbba);
 
-               if(omega < c1) { omega = c1; } //arbitrary limit (24.09.2020)
+               if(omega < c1o1) { omega = c1o1; } //arbitrary limit (24.09.2020)
 
-               //omega = collFactor;
+               //omega = collFactorc1o1
 
                //magic parameter for rheology
-               LBMReal a = 10;
-               OxxPyyPzz = c1 / (a * ((c1 / omega) - c1o2) + c1o2);
-               OxxPyyPzz = (OxxPyyPzz > c1) ? c1 : OxxPyyPzz;
+               real a = 10;
+               OxxPyyPzz = c1o1 / (a * ((c1o1 / omega) - c1o2) + c1o2);
+               OxxPyyPzz = (OxxPyyPzz > c1o1) ? c1o1 : OxxPyyPzz;
 
-               LBMReal OxyyPxzz = 8.0 * (omega - 2.0) * (OxxPyyPzz * (3.0 * omega - 1.0) - 5.0 * omega) / (8.0 * (5.0 - 2.0 * omega) * omega + OxxPyyPzz * (8.0 + omega * (9.0 * omega - 26.0)));
-               LBMReal OxyyMxzz = 8.0 * (omega - 2.0) * (omega + OxxPyyPzz * (3.0 * omega - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * omega + 9.0 * omega * omega) - 8.0 * omega);
-               LBMReal Oxyz = 24.0 * (omega - 2.0) * (4.0 * omega * omega + omega * OxxPyyPzz * (18.0 - 13.0 * omega) + OxxPyyPzz * OxxPyyPzz * (2.0 + omega * (6.0 * omega - 11.0))) / (16.0 * omega * omega * (omega - 6.0) - 2.0 * omega * OxxPyyPzz * (216.0 + 5.0 * omega * (9.0 * omega - 46.0)) + OxxPyyPzz * OxxPyyPzz * (omega * (3.0 * omega - 10.0) * (15.0 * omega - 28.0) - 48.0));
+               real OxyyPxzz = 8.0 * (omega - 2.0) * (OxxPyyPzz * (3.0 * omega - 1.0) - 5.0 * omega) / (8.0 * (5.0 - 2.0 * omega) * omega + OxxPyyPzz * (8.0 + omega * (9.0 * omega - 26.0)));
+               real OxyyMxzz = 8.0 * (omega - 2.0) * (omega + OxxPyyPzz * (3.0 * omega - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * omega + 9.0 * omega * omega) - 8.0 * omega);
+               real Oxyz = 24.0 * (omega - 2.0) * (4.0 * omega * omega + omega * OxxPyyPzz * (18.0 - 13.0 * omega) + OxxPyyPzz * OxxPyyPzz * (2.0 + omega * (6.0 * omega - 11.0))) / (16.0 * omega * omega * (omega - 6.0) - 2.0 * omega * OxxPyyPzz * (216.0 + 5.0 * omega * (9.0 * omega - 46.0)) + OxxPyyPzz * OxxPyyPzz * (omega * (3.0 * omega - 10.0) * (15.0 * omega - 28.0) - 48.0));
 
-               LBMReal A_ = (4.0 * omega * omega + 2.0 * omega * OxxPyyPzz * (omega - 6.0) + OxxPyyPzz * OxxPyyPzz * (omega * (10.0 - 3.0 * omega) - 4.0)) / ((omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
-               LBMReal B_ = (4.0 * omega * OxxPyyPzz * (9.0 * omega - 16.0) - 4.0 * omega * omega - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * omega * (omega - 2.0))) / (3.0 * (omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
+               real A_ = (4.0 * omega * omega + 2.0 * omega * OxxPyyPzz * (omega - 6.0) + OxxPyyPzz * OxxPyyPzz * (omega * (10.0 - 3.0 * omega) - 4.0)) / ((omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
+               real B_ = (4.0 * omega * OxxPyyPzz * (9.0 * omega - 16.0) - 4.0 * omega * omega - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * omega * (omega - 2.0))) / (3.0 * (omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
 
 
                //relax
 
                //wadjust = OxxPyyPzz+(one-OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
                //mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
-               mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+               mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
 
               // mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
                //mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
@@ -723,37 +724,37 @@ void RheologyK17LBMKernel::calculate(int step)
 
                // linear combinations back
                mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+               mfaca = c1o3 * (-c2o1 *  mxxMyy+mxxMzz+mxxPyyPzz);
+               mfaac = c1o3 * (mxxMyy- c2o1 * mxxMzz+mxxPyyPzz);
 
                //3.
                // linear combinations
 
-               LBMReal mxxyPyzz = mfcba+mfabc;
-               LBMReal mxxyMyzz = mfcba-mfabc;
+               real mxxyPyzz = mfcba+mfabc;
+               real mxxyMyzz = mfcba-mfabc;
 
-               LBMReal mxxzPyyz = mfcab+mfacb;
-               LBMReal mxxzMyyz = mfcab-mfacb;
+               real mxxzPyyz = mfcab+mfacb;
+               real mxxzMyyz = mfcab-mfacb;
 
-               LBMReal mxyyPxzz = mfbca+mfbac;
-               LBMReal mxyyMxzz = mfbca-mfbac;
+               real mxyyPxzz = mfbca+mfbac;
+               real mxyyMxzz = mfbca-mfbac;
 
                //relax
                //////////////////////////////////////////////////////////////////////////
                //das ist der limiter
-               wadjust = Oxyz+(one-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
+               wadjust = Oxyz+(c1o1-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
                mfbbb += wadjust * (-mfbbb);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
                mxxyPyzz += wadjust * (-mxxyPyzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
                mxxyMyzz += wadjust * (-mxxyMyzz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
                mxxzPyyz += wadjust * (-mxxzPyyz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
                mxxzMyyz += wadjust * (-mxxzMyyz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
                mxyyPxzz += wadjust * (-mxyyPxzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
                mxyyMxzz += wadjust * (-mxyyMxzz);
                //////////////////////////////////////////////////////////////////////////
                //ohne limiter
@@ -804,12 +805,12 @@ void RheologyK17LBMKernel::calculate(int step)
                //CUMbbc += O4 * (-CUMbbc);
                //CUMbcb += O4 * (-CUMbcb);
                //CUMcbb += O4 * (-CUMcbb);
-               CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A_ + (one - O4) * (CUMacc);
-               CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A_ + (one - O4) * (CUMcac);
-               CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A_ + (one - O4) * (CUMcca);
-               CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B_ + (one - O4) * (CUMbbc);
-               CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B_ + (one - O4) * (CUMbcb);
-               CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B_ + (one - O4) * (CUMcbb);
+               CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A_ + (c1o1 - O4) * (CUMacc);
+               CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A_ + (c1o1 - O4) * (CUMcac);
+               CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A_ + (c1o1 - O4) * (CUMcca);
+               CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * B_ + (c1o1 - O4) * (CUMbbc);
+               CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * B_ + (c1o1 - O4) * (CUMbcb);
+               CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * B_ + (c1o1 - O4) * (CUMcbb);
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -825,31 +826,31 @@ void RheologyK17LBMKernel::calculate(int step)
 
                //back cumulants to central moments
                //4.
-               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+ c2o1 * mfbba * mfbab)/rho;
+               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+ c2o1 * mfbba * mfabb)/rho;
+               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+ c2o1 * mfbab * mfabb)/rho;
 
-               mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcca = CUMcca+(((mfcaa * mfaca+ c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcac = CUMcac+(((mfcaa * mfaac+ c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfacc = CUMacc+(((mfaac * mfaca+ c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                //5.
-               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+ c4o1 * mfabb * mfbbb+ c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+ c4o1 * mfbab * mfbbb+ c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+ c4o1 * mfbba * mfbbb+ c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               mfccc = CUMccc-((-four *  mfbbb * mfbbb
+               mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                ////////////////////////////////////////////////////////////////////////////////////
@@ -865,22 +866,22 @@ void RheologyK17LBMKernel::calculate(int step)
          //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
          ////////////////////////////////////////////////////////////////////////////////////
          // Z - Dir
-               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2-vvz) * c1o2;
+               m1 = -mfaac- c2o1 * mfaab *  vvz+mfaaa                * (c1o1 -vz2)- c1o1 * oMdrho * vz2;
+               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2+vvz) * c1o2;
                mfaaa = m0;
                mfaab = m1;
                mfaac = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-               m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+               m1 = -mfabc- c2o1 * mfabb *  vvz+mfaba * (c1o1 -vz2);
                m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                mfaba = m0;
                mfabb = m1;
                mfabc = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfacc- c2o1 * mfacb *  vvz+mfaca                  * (c1o1 -vz2)-c1o3 * oMdrho * vz2;
                m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfaca = m0;
                mfacb = m1;
@@ -888,21 +889,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-               m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+               m1 = -mfbac- c2o1 * mfbab *  vvz+mfbaa * (c1o1 -vz2);
                m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                mfbaa = m0;
                mfbab = m1;
                mfbac = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-               m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+               m1 = -mfbbc- c2o1 * mfbbb *  vvz+mfbba * (c1o1 -vz2);
                m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                mfbba = m0;
                mfbbb = m1;
                mfbbc = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-               m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+               m1 = -mfbcc- c2o1 * mfbcb *  vvz+mfbca * (c1o1 -vz2);
                m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                mfbca = m0;
                mfbcb = m1;
@@ -910,21 +911,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfcac- c2o1 * mfcab *  vvz+mfcaa                  * (c1o1 -vz2)-c1o3 * oMdrho * vz2;
                m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfcaa = m0;
                mfcab = m1;
                mfcac = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-               m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+               m1 = -mfcbc- c2o1 * mfcbb *  vvz+mfcba * (c1o1 -vz2);
                m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                mfcba = m0;
                mfcbb = m1;
                mfcbc = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+               m1 = -mfccc- c2o1 * mfccb *  vvz+mfcca                  * (c1o1 -vz2)-c1o9 * oMdrho * vz2;
                m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                mfcca = m0;
                mfccb = m1;
@@ -935,21 +936,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Y - Dir
                m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfaca- c2o1 * mfaba *  vvy+mfaaa                  * (c1o1 -vy2)-c1o6 * oMdrho * vy2;
                m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaaa = m0;
                mfaba = m1;
                mfaca = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+               m1 = -mfacb- c2o1 * mfabb *  vvy+mfaab                  * (c1o1 -vy2)-c2o3 * oMdrho * vy2;
                m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                mfaab = m0;
                mfabb = m1;
                mfacb = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfacc- c2o1 * mfabc *  vvy+mfaac                  * (c1o1 -vy2)-c1o6 * oMdrho * vy2;
                m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaac = m0;
                mfabc = m1;
@@ -957,21 +958,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-               m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+               m1 = -mfbca- c2o1 * mfbba *  vvy+mfbaa * (c1o1 -vy2);
                m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                mfbaa = m0;
                mfbba = m1;
                mfbca = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-               m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+               m1 = -mfbcb- c2o1 * mfbbb *  vvy+mfbab * (c1o1 -vy2);
                m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                mfbab = m0;
                mfbbb = m1;
                mfbcb = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-               m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+               m1 = -mfbcc- c2o1 * mfbbc *  vvy+mfbac * (c1o1 -vy2);
                m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                mfbac = m0;
                mfbbc = m1;
@@ -979,21 +980,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfcca- c2o1 * mfcba *  vvy+mfcaa                   * (c1o1 -vy2)-c1o18 * oMdrho * vy2;
                m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcaa = m0;
                mfcba = m1;
                mfcca = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+               m1 = -mfccb- c2o1 * mfcbb *  vvy+mfcab                  * (c1o1 -vy2)-c2o9 * oMdrho * vy2;
                m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                mfcab = m0;
                mfcbb = m1;
                mfccb = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfccc- c2o1 * mfcbc *  vvy+mfcac                   * (c1o1 -vy2)-c1o18 * oMdrho * vy2;
                m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcac = m0;
                mfcbc = m1;
@@ -1004,21 +1005,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // X - Dir
                m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcaa- c2o1 * mfbaa *  vvx+mfaaa                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaaa = m0;
                mfbaa = m1;
                mfcaa = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcba- c2o1 * mfbba *  vvx+mfaba                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaba = m0;
                mfbba = m1;
                mfcba = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcca- c2o1 * mfbca *  vvx+mfaca                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaca = m0;
                mfbca = m1;
@@ -1026,21 +1027,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcab- c2o1 * mfbab *  vvx+mfaab                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaab = m0;
                mfbab = m1;
                mfcab = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+               m1 = -mfcbb- c2o1 * mfbbb *  vvx+mfabb                  * (c1o1 -vx2)-c4o9 * oMdrho * vx2;
                m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabb = m0;
                mfbbb = m1;
                mfcbb = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfccb- c2o1 * mfbcb *  vvx+mfacb                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfacb = m0;
                mfbcb = m1;
@@ -1048,21 +1049,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcac- c2o1 * mfbac *  vvx+mfaac                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaac = m0;
                mfbac = m1;
                mfcac = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcbc- c2o1 * mfbbc *  vvx+mfabc                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabc = m0;
                mfbbc = m1;
                mfcbc = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfccc- c2o1 * mfbcc *  vvx+mfacc                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfacc = m0;
                mfbcc = m1;
@@ -1073,11 +1074,11 @@ void RheologyK17LBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = drho - drho_post;
+               real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1131,13 +1132,13 @@ void RheologyK17LBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double RheologyK17LBMKernel::getCalculationTime()
+real RheologyK17LBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyK17LBMKernel::setBulkViscosity(LBMReal value)
+void RheologyK17LBMKernel::setBulkViscosity(real value)
 {
    bulkViscosity = value;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
index 77bb5ce84557c87210932f8fd76b6ae846414545..80bf7dcb346f500d75a31eca68fefdd092f2e9e8 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
@@ -54,35 +54,35 @@ public:
    virtual ~RheologyK17LBMKernel(void);
    virtual void calculate(int step) override;
    virtual SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    //! The value should not be equal to a shear viscosity
-   void setBulkViscosity(LBMReal value);
+   void setBulkViscosity(real value);
 protected:
    virtual void initDataSet();
 
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const
    {
-       UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class"));
+       UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class"));
    }
 
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
-   LBMReal OxxPyyPzz; //omega2 (bulk viscosity)
-   LBMReal bulkViscosity;
+   real OxxPyyPzz; //omega2 (bulk viscosity)
+   real bulkViscosity;
 
 };
 #endif // RheologyK17LBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
index f399a00065d3cfd2be13f4184057f8223de38573..2b65887c1b0461a836f29a2aff7dc809dbb733e8 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
@@ -41,7 +41,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 RheologyModelLBMKernel::RheologyModelLBMKernel() : forcingX1(0), forcingX2(0), forcingX3(0)
 {
@@ -130,61 +131,61 @@ void RheologyModelLBMKernel::calculate(int step)
 						// a b c
 						//-1 0 1
 
-						LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-						LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-						LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-						LBMReal m0, m1, m2;
-
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+						real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+						real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+						real m0, m1, m2;
+
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 
-						LBMReal collFactorF = collFactor;
+						real collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -195,7 +196,7 @@ void RheologyModelLBMKernel::calculate(int step)
 							vvz += forcingX3 * deltaT * 0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -223,15 +224,15 @@ void RheologyModelLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx * vvx;
 						vy2 = vvy * vvy;
 						vz2 = vvz * vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -462,33 +463,33 @@ void RheologyModelLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
 											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -502,20 +503,20 @@ void RheologyModelLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy = -three * collFactorF * mfbba;
-						LBMReal Dxz = -three * collFactorF * mfbab;
-						LBMReal Dyz = -three * collFactorF * mfabb;
+						real Dxy = -c3o1 * collFactorF * mfbba;
+						real Dxz = -c3o1 * collFactorF * mfbab;
+						real Dyz = -c3o1 * collFactorF * mfabb;
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 						//non Newtonian fluid collision factor
-						LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real shearRate = sqrt(c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 						collFactorF = getRheologyCollFactor(collFactorF, shearRate, rho);
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -535,14 +536,14 @@ void RheologyModelLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -837,11 +838,11 @@ void RheologyModelLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -913,7 +914,7 @@ void RheologyModelLBMKernel::calculate(int step)
 //	return kernel;
 //}
 
-double RheologyModelLBMKernel::getCalculationTime()
+real RheologyModelLBMKernel::getCalculationTime()
 {
    return timer.getTotalTime();
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
index 0aba5a273536feab9ce892b09ae1837df4fd6a7f..8b4ce82984d12ef33b1011fe020fe7d3f0c638e4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
@@ -51,31 +51,31 @@ public:
 	virtual ~RheologyModelLBMKernel();
 	void calculate(int step);
 	virtual SPtr<LBMKernel> clone() { UB_THROW(UbException("SPtr<LBMKernel> clone() - belongs in the derived class")); };
-	double getCalculationTime();
+	real getCalculationTime();
 
 	void swapDistributions();
 
 protected:
 	void initDataSet();
 
-	virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+	virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
 	bool test;
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
index 2ab87c0b648b48fe56cff8380afdafb7deed08d4..308add14b3710983ea9139b405c3074891b1b7c9 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
@@ -6,10 +6,12 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include "Rheology.h"
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
 using namespace UbMath;
+using namespace vf::lbm::constant;
 
 
 RheologyModelLBMKernel2::RheologyModelLBMKernel2() : forcingX1(0), forcingX2(0), forcingX3(0)
@@ -99,61 +101,61 @@ void RheologyModelLBMKernel2::calculate(int step)
 						// a b c
 						//-1 0 1
 
-						LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-						LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-						LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-						LBMReal m0, m1, m2;
-
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+						real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+						real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+						real m0, m1, m2;
+
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 
-						LBMReal collFactorF = collFactor;
+						real collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -164,7 +166,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 							vvz += forcingX3 * deltaT * 0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -192,15 +194,15 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx * vvx;
 						vy2 = vvy * vvy;
 						vz2 = vvz * vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -379,7 +381,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m1 = mfcca - mfaca;
 						m0 = m2 + mfbca;
 						mfaca = m0;
-						m0 += c1o3 * oMdrho;
+						m0 += vf::lbm::constant::c1o3 * oMdrho;
 						mfbca = m1 - m0 * vvx;
 						mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
 						////////////////////////////////////////////////////////////////////////////////////
@@ -410,7 +412,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m1 = mfcac - mfaac;
 						m0 = m2 + mfbac;
 						mfaac = m0;
-						m0 += c1o3 * oMdrho;
+						m0 += vf::lbm::constant::c1o3 * oMdrho;
 						mfbac = m1 - m0 * vvx;
 						mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
 						////////////////////////////////////////////////////////////////////////////////////
@@ -431,53 +433,53 @@ void RheologyModelLBMKernel2::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
-											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
+											  //real OxyyMxzz  = 1.;//2+s9;//
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + vf::lbm::constant::c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + vf::lbm::constant::c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + vf::lbm::constant::c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + vf::lbm::constant::c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + vf::lbm::constant::c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + vf::lbm::constant::c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - vf::lbm::constant::c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - vf::lbm::constant::c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - vf::lbm::constant::c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
 							+ (4. * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
 								+ 2. * (mfcaa * mfaca * mfaac)
 								+ 16. * mfbba * mfbab * mfabb)
-							- c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
+							- vf::lbm::constant::c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
 							- c1o9 * (mfcaa + mfaca + mfaac) * oMdrho * (1. - 2. * oMdrho) - c1o27 * oMdrho * oMdrho * (-2. * oMdrho)
 							+ (2. * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
 								+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa)) * c2o3 * oMdrho) + c1o27 * oMdrho;
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
 //						LBMReal Dxy = -three * collFactorF * mfbba;
 //						LBMReal Dxz = -three * collFactorF * mfbab;
@@ -486,7 +488,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//non Newtonian fluid collision factor
 //						LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
 
-						LBMReal shearFactor = sqrt(c1o2 * ((mfcaa - mfaaa * c1o3) * (mfcaa - mfaaa * c1o3) + (mfaca - mfaaa * c1o3) * (mfaca - mfaaa * c1o3) + (mfaac - mfaaa * c1o3) * (mfaac - mfaaa * c1o3)) + mfbba * mfbba + mfbab * mfbab + mfabb * mfabb) + UbMath::Epsilon<LBMReal>::val();
+						real shearFactor = sqrt(c1o2 * ((mfcaa - mfaaa * vf::lbm::constant::c1o3) * (mfcaa - mfaaa * vf::lbm::constant::c1o3) + (mfaca - mfaaa * vf::lbm::constant::c1o3) * (mfaca - mfaaa * vf::lbm::constant::c1o3) + (mfaac - mfaaa * vf::lbm::constant::c1o3) * (mfaac - mfaaa * vf::lbm::constant::c1o3)) + mfbba * mfbba + mfbab * mfbab + mfabb * mfabb) + UbMath::Epsilon<real>::val();
 
 						//collFactorF = getRheologyCollFactor(collFactorF, shearRate, rho);
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -505,7 +507,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbba += getRheologyCollFactor(collFactorF, std::abs(Dxy) / (rho + one), rho) * (-mfbba);
 
 						SPtr<Rheology> thix = Rheology::getInstance();
-						LBMReal tau0 = thix->getYieldStress();
+						real tau0 = thix->getYieldStress();
 
 						mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz /*+ ((mxxPyyPzz-mfaaa)/shearFactor*tau0)*/) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
 						//mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -516,7 +518,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbab += collFactorF * (-mfbab + mfbab/shearFactor*tau0);
 						//mfbba += collFactorF * (-mfbba + mfbba/shearFactor*tau0);
 
-						collFactorF = collFactor * (c1 - tau0 / shearFactor);
+						collFactorF = collFactor * (c1o1 - tau0 / shearFactor);
 
 						mxxMyy += collFactorF * (-mxxMyy/* + mxxMyy / shearFactor * tau0*/) - 3. * (1. - c1o2 * collFactorF) * (vx2 * dxux - vy2 * dyuy);
 						mxxMzz += collFactorF * (-mxxMzz/* + mxxMzz / shearFactor * tau0*/) - 3. * (1. - c1o2 * collFactorF) * (vx2 * dxux - vz2 * dzuz);
@@ -527,20 +529,20 @@ void RheologyModelLBMKernel2::calculate(int step)
 
 
 						// linear combinations back
-						mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-						mfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
-						mfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
+						mfcaa = vf::lbm::constant::c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+						mfaca = vf::lbm::constant::c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
+						mfaac = vf::lbm::constant::c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -589,18 +591,18 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbcb = CUMbcb + ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//mfbbc = CUMbbc + ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						mfcbb = CUMcbb + ((mfcaa + vf::lbm::constant::c1o3) * mfabb + 2. * mfbba * mfbab);
+						mfbcb = CUMbcb + ((mfaca + vf::lbm::constant::c1o3) * mfbab + 2. * mfbba * mfabb);
+						mfbbc = CUMbbc + ((mfaac + vf::lbm::constant::c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
-						mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
-						mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + vf::lbm::constant::c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + vf::lbm::constant::c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + vf::lbm::constant::c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
 
 						//5.
-						mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
-						mfcbc = CUMcbc + (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc) * oMdrho;
-						mfccb = CUMccb + (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab) * oMdrho;
+						mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + vf::lbm::constant::c1o3 * (mfbca + mfbac) * oMdrho;
+						mfcbc = CUMcbc + (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) + vf::lbm::constant::c1o3 * (mfcba + mfabc) * oMdrho;
+						mfccb = CUMccb + (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) + vf::lbm::constant::c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//6.
 						mfccc = CUMccc - ((-4. * mfbbb * mfbbb
@@ -610,7 +612,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 							+ (4. * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
 								+ 2. * (mfcaa * mfaca * mfaac)
 								+ 16. * mfbba * mfbab * mfabb)
-							- c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
+							- vf::lbm::constant::c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
 							- c1o9 * (mfcaa + mfaca + mfaac) * oMdrho * (1. - 2. * oMdrho) - c1o27 * oMdrho * oMdrho * (-2. * oMdrho)
 							+ (2. * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
 								+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa)) * c2o3 * oMdrho) - c1o27 * oMdrho;
@@ -642,9 +644,9 @@ void RheologyModelLBMKernel2::calculate(int step)
 						mfabb = m1;
 						mfabc = m2;
 						////////////////////////////////////////////////////////////////////////////////////
-						m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-						m1 = -mfacc - 2. * mfacb * vvz + mfaca * (1. - vz2) - c1o3 * oMdrho * vz2;
-						m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
+						m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + vf::lbm::constant::c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
+						m1 = -mfacc - 2. * mfacb * vvz + mfaca * (1. - vz2) - vf::lbm::constant::c1o3 * oMdrho * vz2;
+						m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + vf::lbm::constant::c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
 						mfaca = m0;
 						mfacb = m1;
 						mfacc = m2;
@@ -672,9 +674,9 @@ void RheologyModelLBMKernel2::calculate(int step)
 						mfbcc = m2;
 						////////////////////////////////////////////////////////////////////////////////////
 						////////////////////////////////////////////////////////////////////////////////////
-						m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-						m1 = -mfcac - 2. * mfcab * vvz + mfcaa * (1. - vz2) - c1o3 * oMdrho * vz2;
-						m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
+						m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + vf::lbm::constant::c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
+						m1 = -mfcac - 2. * mfcab * vvz + mfcaa * (1. - vz2) - vf::lbm::constant::c1o3 * oMdrho * vz2;
+						m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + vf::lbm::constant::c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
 						mfcaa = m0;
 						mfcab = m1;
 						mfcac = m2;
@@ -835,11 +837,11 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -911,7 +913,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 //	return kernel;
 //}
 
-double RheologyModelLBMKernel2::getCalculationTime()
+real RheologyModelLBMKernel2::getCalculationTime()
 {
    return timer.getTotalTime();
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
index c3a20a11038f74586266c8338027a6187ef2fea5..ca058f1e4e67cd8cf51e971999f9a2a359aa197d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
@@ -19,31 +19,31 @@ public:
 	virtual ~RheologyModelLBMKernel2();
 	void calculate(int step);
 	virtual SPtr<LBMKernel> clone() { UB_THROW(UbException("SPtr<LBMKernel> clone() - belongs in the derived class")); };
-	double getCalculationTime();
+	real getCalculationTime();
 
 	void swapDistributions();
 
 protected:
 	void initDataSet();
 
-	virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+	virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
 	bool test;
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
index a68eca9cd627be7b56bcfebfd429d80c3f924aa8..acd02101fed8793175a6aae62394ce14e2847832 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
@@ -60,7 +60,7 @@ public:
 		return kernel;
 	}
 protected:
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getPowellEyringCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
index a13e4fc716725156adc28841da22c4b2516dc24f..6d11842f6f2d92b4daff1f7c1bb348af0c8021ca 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath; 
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 ThixotropyExpLBMKernel::ThixotropyExpLBMKernel()
@@ -134,37 +135,37 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////
 						//////////////////////////////////////////////////////////////////////////
 
-						LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+						real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
 								  
-						LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+						real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
 								  
-						LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+						real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
-						LBMReal lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
@@ -214,19 +215,19 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal m0, m1, m2;
+						real m0, m1, m2;
 
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 						
@@ -240,9 +241,9 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1*maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2*maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3*maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1*maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2*maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3*maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -253,7 +254,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							vvz += forcingX3*deltaT*0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -281,15 +282,15 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx*vvx;
 						vy2 = vvy*vvy;
 						vz2 = vvz*vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -520,33 +521,33 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
 											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -560,19 +561,19 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy =-three*collFactorF*mfbba;
-                  LBMReal Dxz =-three*collFactorF*mfbab;
-                  LBMReal Dyz =-three*collFactorF*mfabb;
+						real Dxy =-c3o1*collFactorF*mfbba;
+						real Dxz =-c3o1*collFactorF*mfbab;
+						real Dyz =-c3o1*collFactorF*mfabb;
 
-						LBMReal gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 						//collFactorF = BinghamModel::getBinghamCollFactor(collFactorF, gammaDot, rho);
 
 						//relax
@@ -591,14 +592,14 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz)*fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -893,11 +894,11 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -959,7 +960,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 
-						LBMReal ux, uy, uz;
+						real ux, uy, uz;
 
 						ux = vvx;						
 						uy = vvy;
@@ -1005,7 +1006,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+						real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 							(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 							((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 					
@@ -1021,56 +1022,56 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//collFactorF = (collFactorF < 0.5) ? 0.5 : collFactorF;
 
-						LBMReal dlambda = one / theta - alpha * lambda * gammaDot;
+						real dlambda = c1o1 / theta - alpha * lambda * gammaDot;
 
 
 						//////////////////////////////////////////////////////////////////////////
 						//collision Factorized Central moment Kernel Geier 2015
 						//////////////////////////////////////////////////////////////////////////               
-						LBMReal Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
+						real Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 							mfacb + mfacc + mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 							mfbcb + mfbcc + mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb + mfccc;
 						
 						Mom000 += dlambda*c1o2;  //1
 
 																												   //(100)//
-						LBMReal Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
+						real Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
 							mfacb*(-1 - ux) + mfacc*(-1 - ux) + mfcaa*(1 - ux) + mfcab*(1 - ux) + mfcac*(1 - ux) + mfcba*(1 - ux) + mfcbb*(1 - ux) +
 							mfcbc*(1 - ux) + mfcca*(1 - ux) + mfccb*(1 - ux) + mfccc*(1 - ux) - mfbaa*ux - mfbab*ux - mfbac*ux - mfbba*ux - mfbbb*ux -
 							mfbbc*ux - mfbca*ux - mfbcb*ux - mfbcc*ux;
 
-						LBMReal Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
+						real Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
 							mfcab*(-1 - uy) + mfcac*(-1 - uy) + mfaca*(1 - uy) + mfacb*(1 - uy) + mfacc*(1 - uy) + mfbca*(1 - uy) + mfbcb*(1 - uy) +
 							mfbcc*(1 - uy) + mfcca*(1 - uy) + mfccb*(1 - uy) + mfccc*(1 - uy) - mfaba*uy - mfabb*uy - mfabc*uy - mfbba*uy - mfbbb*uy -
 							mfbbc*uy - mfcba*uy - mfcbb*uy - mfcbc*uy;
 
-						LBMReal Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
+						real Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
 							mfcba*(-1 - uz) + mfcca*(-1 - uz) + mfaac*(1 - uz) + mfabc*(1 - uz) + mfacc*(1 - uz) + mfbac*(1 - uz) + mfbbc*(1 - uz) +
 							mfbcc*(1 - uz) + mfcac*(1 - uz) + mfcbc*(1 - uz) + mfccc*(1 - uz) - mfaab*uz - mfabb*uz - mfacb*uz - mfbab*uz - mfbbb*uz -
 							mfbcb*uz - mfcab*uz - mfcbb*uz - mfccb*uz;
 						////
 
 						//(110)//
-						LBMReal Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
+						real Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
 							mfcab*(1 - ux)*(-1 - uy) + mfcac*(1 - ux)*(-1 - uy) - mfbaa*ux*(-1 - uy) - mfbab*ux*(-1 - uy) - mfbac*ux*(-1 - uy) +
 							mfaca*(-1 - ux)*(1 - uy) + mfacb*(-1 - ux)*(1 - uy) + mfacc*(-1 - ux)*(1 - uy) + mfcca*(1 - ux)*(1 - uy) + mfccb*(1 - ux)*(1 - uy) +
 							mfccc*(1 - ux)*(1 - uy) - mfbca*ux*(1 - uy) - mfbcb*ux*(1 - uy) - mfbcc*ux*(1 - uy) - mfaba*(-1 - ux)*uy - mfabb*(-1 - ux)*uy -
 							mfabc*(-1 - ux)*uy - mfcba*(1 - ux)*uy - mfcbb*(1 - ux)*uy - mfcbc*(1 - ux)*uy + mfbba*ux*uy + mfbbb*ux*uy + mfbbc*ux*uy;
 
-						LBMReal Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
+						real Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
 							mfcba*(1 - ux)*(-1 - uz) + mfcca*(1 - ux)*(-1 - uz) - mfbaa*ux*(-1 - uz) - mfbba*ux*(-1 - uz) - mfbca*ux*(-1 - uz) +
 							mfaac*(-1 - ux)*(1 - uz) + mfabc*(-1 - ux)*(1 - uz) + mfacc*(-1 - ux)*(1 - uz) + mfcac*(1 - ux)*(1 - uz) + mfcbc*(1 - ux)*(1 - uz) +
 							mfccc*(1 - ux)*(1 - uz) - mfbac*ux*(1 - uz) - mfbbc*ux*(1 - uz) - mfbcc*ux*(1 - uz) - mfaab*(-1 - ux)*uz - mfabb*(-1 - ux)*uz -
 							mfacb*(-1 - ux)*uz - mfcab*(1 - ux)*uz - mfcbb*(1 - ux)*uz - mfccb*(1 - ux)*uz + mfbab*ux*uz + mfbbb*ux*uz + mfbcb*ux*uz;
 
-						LBMReal Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
+						real Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
 							mfbca*(1 - uy)*(-1 - uz) + mfcca*(1 - uy)*(-1 - uz) - mfaba*uy*(-1 - uz) - mfbba*uy*(-1 - uz) - mfcba*uy*(-1 - uz) +
 							mfaac*(-1 - uy)*(1 - uz) + mfbac*(-1 - uy)*(1 - uz) + mfcac*(-1 - uy)*(1 - uz) + mfacc*(1 - uy)*(1 - uz) + mfbcc*(1 - uy)*(1 - uz) +
 							mfccc*(1 - uy)*(1 - uz) - mfabc*uy*(1 - uz) - mfbbc*uy*(1 - uz) - mfcbc*uy*(1 - uz) - mfaab*(-1 - uy)*uz - mfbab*(-1 - uy)*uz -
 							mfcab*(-1 - uy)*uz - mfacb*(1 - uy)*uz - mfbcb*(1 - uy)*uz - mfccb*(1 - uy)*uz + mfabb*uy*uz + mfbbb*uy*uz + mfcbb*uy*uz;
 						////
 
-						LBMReal Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
+						real Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
 							mfaca*(-1 - ux)*(1 - uy)*(-1 - uz) + mfcca*(1 - ux)*(1 - uy)*(-1 - uz) - mfbca*ux*(1 - uy)*(-1 - uz) -
 							mfaba*(-1 - ux)*uy*(-1 - uz) - mfcba*(1 - ux)*uy*(-1 - uz) + mfbba*ux*uy*(-1 - uz) + mfaac*(-1 - ux)*(-1 - uy)*(1 - uz) +
 							mfcac*(1 - ux)*(-1 - uy)*(1 - uz) - mfbac*ux*(-1 - uy)*(1 - uz) + mfacc*(-1 - ux)*(1 - uy)*(1 - uz) +
@@ -1080,14 +1081,14 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							mfbbb*ux*uy*uz;
 
 						//(200)//
-						LBMReal Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
+						real Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
 							mfccc)*pow(-1 + ux, 2) +
 							(mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 								mfbcb + mfbcc)*pow(ux, 2) +
 								(mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 									mfacb + mfacc)*pow(1 + ux, 2)) - Mom000 / 3;
 
-						LBMReal Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
 							(mfcca + mfccb + mfccc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
@@ -1097,7 +1098,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfbaa + mfbab + mfbac)*pow(1 + uy, 2) +
 							(mfcaa + mfcab + mfcac)*pow(1 + uy, 2)) - Mom000 / 3;
 
-						LBMReal Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
+						real Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
 							mfcba + mfcbc + mfcca + mfccc + mfaac*pow(-1 + uz, 2) +
 							mfbac*pow(-1 + uz, 2) + mfcac*pow(-1 + uz, 2) +
 							2 * mfaba*uz - 2 * mfabc*uz + 2 * mfaca*uz - 2 * mfacc*uz +
@@ -1115,7 +1116,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(210)//
-						LBMReal Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
+						real Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
 							(mfaba + mfabb + mfabc)*uy -
 							(mfaaa + mfaab + mfaac)*(1 + uy)) +
 							pow(ux, 2)*(-((mfbca + mfbcb + mfbcc)*(-1 + uy)) -
@@ -1125,7 +1126,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*uy -
 								(mfcaa + mfcab + mfcac)*(1 + uy))) - Mom010 / 3;
 
-						LBMReal Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
+						real Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
 							mfaac*(-1 + uz) + mfaab*uz + mfaba*uz + mfabb*uz +
 							mfabc*uz + mfaca*uz + mfacb*uz + mfacc*uz +
 							mfaaa*(1 + uz))) -
@@ -1136,7 +1137,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								mfcac*(-1 + uz) + mfcab*uz + mfcba*uz + mfcbb*uz +
 								mfcbc*uz + mfcca*uz + mfccb*uz + mfccc*uz + mfcaa*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) -
 							ux*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1147,7 +1148,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom100 / 3;
 
 
-						LBMReal Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1168,7 +1169,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 									mfcaa*pow(1 + uz, 2))) - Mom100 / 3;
 
-						LBMReal Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfacc*(-1 + uz) + mfacb*uz +
@@ -1181,7 +1182,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							pow(uy, 2)*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1203,7 +1204,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 						//(220)//
-						LBMReal Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) +
 							pow(ux, 2)*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1213,7 +1214,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*pow(uy, 2) +
 								(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom000 / 9;
 
-						LBMReal Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1235,7 +1236,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 								mfcaa*pow(1 + uz, 2))) - Mom000 / 9;
 
-						LBMReal Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) +
@@ -1256,7 +1257,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(221)//
-						LBMReal Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
+						real Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1275,7 +1276,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 									mfcca*(1 + uz)))) - Mom001 / 9;
 
-						LBMReal Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
+						real Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1297,7 +1298,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									(-1 + uy)*(mfccc*pow(-1 + uz, 2) + mfccb*pow(uz, 2) +
 										mfcca*pow(1 + uz, 2)))) - Mom010 / 9;
 
-						LBMReal Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
+						real Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1320,7 +1321,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(211)//
-						LBMReal Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz)) +
 							uy*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) +
 							(-1 + uy)*(mfacc*(-1 + uz) + mfacb*uz + mfaca*(1 + uz))) +
@@ -1333,7 +1334,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								uy*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) +
 								(-1 + uy)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz)))) - Mom011 / 3;
 
-						LBMReal Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
+						real Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1352,7 +1353,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 										pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 											mfcca*(1 + uz)))) - Mom101 / 3;
 
-						LBMReal Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
+						real Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
 							mfaab*pow(uz, 2) + mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1373,7 +1374,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(222)//
-						LBMReal Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
+						real Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1400,7 +1401,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 
-						LBMReal Meq000 = drho+dlambda*c1o2;
+						real Meq000 = drho+dlambda*c1o2;
 
 
 						// relaxation Central Moment MRT
@@ -1821,46 +1822,46 @@ void ThixotropyExpLBMKernel::calculate(int step)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCalculationTime()
+real ThixotropyExpLBMKernel::getCalculationTime()
 {
 	//return timer.getDuration();
 	return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyExpLBMKernel::setCollisionFactorF(double collFactor)
+void ThixotropyExpLBMKernel::setCollisionFactorF(real collFactor)
 {
 	setCollisionFactor(collFactor);
 	this->collFactorF = collFactor;
 
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyExpLBMKernel::setCollisionFactorH(double collFactor)
+void ThixotropyExpLBMKernel::setCollisionFactorH(real collFactor)
 {
 	this->collFactorH = collFactor;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCollisionFactorF() const
+real ThixotropyExpLBMKernel::getCollisionFactorF() const
 {
 	return this->collFactorF;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCollisionFactorH() const
+real ThixotropyExpLBMKernel::getCollisionFactorH() const
 {
 	return this->collFactorH;
 }
-void ThixotropyExpLBMKernel::setAlpha(double alpha)
+void ThixotropyExpLBMKernel::setAlpha(real alpha)
 {
 	this->alpha = alpha;
 }
-double ThixotropyExpLBMKernel::getAlpha() const
+real ThixotropyExpLBMKernel::getAlpha() const
 {
 	return this->alpha;
 }
-void ThixotropyExpLBMKernel::setTheta(double theta)
+void ThixotropyExpLBMKernel::setTheta(real theta)
 {
 	this->theta = theta;
 }
-double ThixotropyExpLBMKernel::getTheta() const
+real ThixotropyExpLBMKernel::getTheta() const
 {
 	return this->theta;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
index 3104808ad21c3976a71ab7a84c06b542932ae08d..1316493c81c56a849cdcc39a9adfb064aa684205 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
@@ -22,50 +22,50 @@ public:
 	virtual ~ThixotropyExpLBMKernel(void);
 	virtual void calculate(int step);
 	virtual SPtr<LBMKernel> clone();
-	double getCalculationTime();
+	real getCalculationTime();
  
-	void setCollisionFactorF(double collFactor);
-   void setCollisionFactorH(double collFactor);
-   double getCollisionFactorF() const;
-   double getCollisionFactorH() const;
+	void setCollisionFactorF(real collFactor);
+   void setCollisionFactorH(real collFactor);
+   real getCollisionFactorF() const;
+   real getCollisionFactorH() const;
 
-	void setAlpha(double alpha);
-	double getAlpha() const;
+	void setAlpha(real alpha);
+	real getAlpha() const;
 
-	void setTheta(double theta);
-	double getTheta() const;
+	void setTheta(real theta);
+	real getTheta() const;
 
 	void swapDistributions();
 
 protected:
 	virtual void initDataSet();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	Parameter parameter;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
-	LBMReal collFactorF;
-   LBMReal collFactorH;
+	real collFactorF;
+   real collFactorH;
 
-	LBMReal theta;
-	LBMReal alpha;
+	real theta;
+	real alpha;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
index b369b45a6c7b10efb91716634443c88aa520a8cf..6fb32f31a3d40e369fb36b48e8d4f63c7a17322d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////
 ThixotropyLBMKernel::ThixotropyLBMKernel()
@@ -134,37 +135,37 @@ void ThixotropyLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////
 						//////////////////////////////////////////////////////////////////////////
 
-						LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+						real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
 								  
-						LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+						real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
 								  
-						LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+						real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
-						LBMReal lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
@@ -214,35 +215,35 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal m0, m1, m2;
+						real m0, m1, m2;
 
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 						
 
-						LBMReal eta0 = (1/collFactor-c1o2)*c1o3;
-						LBMReal eta = (1 + lambda)* eta0;
-						collFactorF = one/(3*eta/(rho+one)+c1o2);
+						real eta0 = (1/collFactor-c1o2)*c1o3;
+						real eta = (1 + lambda)* eta0;
+						collFactorF = c1o1 /(3*eta/(rho+ c1o1)+c1o2);
 						//collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1*maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2*maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3*maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1*maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2*maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3*maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -253,7 +254,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							vvz += forcingX3*deltaT*0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -281,15 +282,15 @@ void ThixotropyLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx*vvx;
 						vy2 = vvy*vvy;
 						vz2 = vvz*vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -520,33 +521,33 @@ void ThixotropyLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
-											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
+											  //real OxyyMxzz  = 1.;//2+s9;//
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -560,17 +561,17 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy =-three*collFactorF*mfbba;
-                  LBMReal Dxz =-three*collFactorF*mfbab;
-                  LBMReal Dyz =-three*collFactorF*mfabb;
+						real Dxy =-c3o1 *collFactorF*mfbba;
+						real Dxz =-c3o1*collFactorF*mfbab;
+						real Dyz =-c3o1*collFactorF*mfabb;
 
 						//relax
 						mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -588,14 +589,14 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz)*fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -890,11 +891,11 @@ void ThixotropyLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -956,7 +957,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 
-						LBMReal ux, uy, uz;
+						real ux, uy, uz;
 
 						ux = vvx;						
 						uy = vvy;
@@ -1002,7 +1003,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+						real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 							(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 							((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 					
@@ -1012,59 +1013,59 @@ void ThixotropyLBMKernel::calculate(int step)
 						//LBMReal theta = 60 * 1.28172e+06;
 						//LBMReal alpha = 0.005;// *10.0;
 
-						LBMReal gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 
-						LBMReal dlambda = one / theta - alpha * lambda * gammaDot;
+						real dlambda = c1o1 / theta - alpha * lambda * gammaDot;
 
 						//LBMReal dlambda = one / (T*pow(lambda,m)) - alpha * lambda * gammaDot;
 
 						//////////////////////////////////////////////////////////////////////////
 						//collision Factorized Central moment Kernel Geier 2015
 						//////////////////////////////////////////////////////////////////////////               
-						LBMReal Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
+						real Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 							mfacb + mfacc + mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 							mfbcb + mfbcc + mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb + mfccc;
 						
 						Mom000 += dlambda*c1o2;  //1
 
 																												   //(100)//
-						LBMReal Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
+						real Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
 							mfacb*(-1 - ux) + mfacc*(-1 - ux) + mfcaa*(1 - ux) + mfcab*(1 - ux) + mfcac*(1 - ux) + mfcba*(1 - ux) + mfcbb*(1 - ux) +
 							mfcbc*(1 - ux) + mfcca*(1 - ux) + mfccb*(1 - ux) + mfccc*(1 - ux) - mfbaa*ux - mfbab*ux - mfbac*ux - mfbba*ux - mfbbb*ux -
 							mfbbc*ux - mfbca*ux - mfbcb*ux - mfbcc*ux;
 
-						LBMReal Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
+						real Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
 							mfcab*(-1 - uy) + mfcac*(-1 - uy) + mfaca*(1 - uy) + mfacb*(1 - uy) + mfacc*(1 - uy) + mfbca*(1 - uy) + mfbcb*(1 - uy) +
 							mfbcc*(1 - uy) + mfcca*(1 - uy) + mfccb*(1 - uy) + mfccc*(1 - uy) - mfaba*uy - mfabb*uy - mfabc*uy - mfbba*uy - mfbbb*uy -
 							mfbbc*uy - mfcba*uy - mfcbb*uy - mfcbc*uy;
 
-						LBMReal Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
+						real Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
 							mfcba*(-1 - uz) + mfcca*(-1 - uz) + mfaac*(1 - uz) + mfabc*(1 - uz) + mfacc*(1 - uz) + mfbac*(1 - uz) + mfbbc*(1 - uz) +
 							mfbcc*(1 - uz) + mfcac*(1 - uz) + mfcbc*(1 - uz) + mfccc*(1 - uz) - mfaab*uz - mfabb*uz - mfacb*uz - mfbab*uz - mfbbb*uz -
 							mfbcb*uz - mfcab*uz - mfcbb*uz - mfccb*uz;
 						////
 
 						//(110)//
-						LBMReal Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
+						real Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
 							mfcab*(1 - ux)*(-1 - uy) + mfcac*(1 - ux)*(-1 - uy) - mfbaa*ux*(-1 - uy) - mfbab*ux*(-1 - uy) - mfbac*ux*(-1 - uy) +
 							mfaca*(-1 - ux)*(1 - uy) + mfacb*(-1 - ux)*(1 - uy) + mfacc*(-1 - ux)*(1 - uy) + mfcca*(1 - ux)*(1 - uy) + mfccb*(1 - ux)*(1 - uy) +
 							mfccc*(1 - ux)*(1 - uy) - mfbca*ux*(1 - uy) - mfbcb*ux*(1 - uy) - mfbcc*ux*(1 - uy) - mfaba*(-1 - ux)*uy - mfabb*(-1 - ux)*uy -
 							mfabc*(-1 - ux)*uy - mfcba*(1 - ux)*uy - mfcbb*(1 - ux)*uy - mfcbc*(1 - ux)*uy + mfbba*ux*uy + mfbbb*ux*uy + mfbbc*ux*uy;
 
-						LBMReal Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
+						real Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
 							mfcba*(1 - ux)*(-1 - uz) + mfcca*(1 - ux)*(-1 - uz) - mfbaa*ux*(-1 - uz) - mfbba*ux*(-1 - uz) - mfbca*ux*(-1 - uz) +
 							mfaac*(-1 - ux)*(1 - uz) + mfabc*(-1 - ux)*(1 - uz) + mfacc*(-1 - ux)*(1 - uz) + mfcac*(1 - ux)*(1 - uz) + mfcbc*(1 - ux)*(1 - uz) +
 							mfccc*(1 - ux)*(1 - uz) - mfbac*ux*(1 - uz) - mfbbc*ux*(1 - uz) - mfbcc*ux*(1 - uz) - mfaab*(-1 - ux)*uz - mfabb*(-1 - ux)*uz -
 							mfacb*(-1 - ux)*uz - mfcab*(1 - ux)*uz - mfcbb*(1 - ux)*uz - mfccb*(1 - ux)*uz + mfbab*ux*uz + mfbbb*ux*uz + mfbcb*ux*uz;
 
-						LBMReal Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
+						real Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
 							mfbca*(1 - uy)*(-1 - uz) + mfcca*(1 - uy)*(-1 - uz) - mfaba*uy*(-1 - uz) - mfbba*uy*(-1 - uz) - mfcba*uy*(-1 - uz) +
 							mfaac*(-1 - uy)*(1 - uz) + mfbac*(-1 - uy)*(1 - uz) + mfcac*(-1 - uy)*(1 - uz) + mfacc*(1 - uy)*(1 - uz) + mfbcc*(1 - uy)*(1 - uz) +
 							mfccc*(1 - uy)*(1 - uz) - mfabc*uy*(1 - uz) - mfbbc*uy*(1 - uz) - mfcbc*uy*(1 - uz) - mfaab*(-1 - uy)*uz - mfbab*(-1 - uy)*uz -
 							mfcab*(-1 - uy)*uz - mfacb*(1 - uy)*uz - mfbcb*(1 - uy)*uz - mfccb*(1 - uy)*uz + mfabb*uy*uz + mfbbb*uy*uz + mfcbb*uy*uz;
 						////
 
-						LBMReal Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
+						real Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
 							mfaca*(-1 - ux)*(1 - uy)*(-1 - uz) + mfcca*(1 - ux)*(1 - uy)*(-1 - uz) - mfbca*ux*(1 - uy)*(-1 - uz) -
 							mfaba*(-1 - ux)*uy*(-1 - uz) - mfcba*(1 - ux)*uy*(-1 - uz) + mfbba*ux*uy*(-1 - uz) + mfaac*(-1 - ux)*(-1 - uy)*(1 - uz) +
 							mfcac*(1 - ux)*(-1 - uy)*(1 - uz) - mfbac*ux*(-1 - uy)*(1 - uz) + mfacc*(-1 - ux)*(1 - uy)*(1 - uz) +
@@ -1074,14 +1075,14 @@ void ThixotropyLBMKernel::calculate(int step)
 							mfbbb*ux*uy*uz;
 
 						//(200)//
-						LBMReal Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
+						real Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
 							mfccc)*pow(-1 + ux, 2) +
 							(mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 								mfbcb + mfbcc)*pow(ux, 2) +
 								(mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 									mfacb + mfacc)*pow(1 + ux, 2)) - Mom000 / 3;
 
-						LBMReal Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
 							(mfcca + mfccb + mfccc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
@@ -1091,7 +1092,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfbaa + mfbab + mfbac)*pow(1 + uy, 2) +
 							(mfcaa + mfcab + mfcac)*pow(1 + uy, 2)) - Mom000 / 3;
 
-						LBMReal Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
+						real Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
 							mfcba + mfcbc + mfcca + mfccc + mfaac*pow(-1 + uz, 2) +
 							mfbac*pow(-1 + uz, 2) + mfcac*pow(-1 + uz, 2) +
 							2 * mfaba*uz - 2 * mfabc*uz + 2 * mfaca*uz - 2 * mfacc*uz +
@@ -1109,7 +1110,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(210)//
-						LBMReal Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
+						real Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
 							(mfaba + mfabb + mfabc)*uy -
 							(mfaaa + mfaab + mfaac)*(1 + uy)) +
 							pow(ux, 2)*(-((mfbca + mfbcb + mfbcc)*(-1 + uy)) -
@@ -1119,7 +1120,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*uy -
 								(mfcaa + mfcab + mfcac)*(1 + uy))) - Mom010 / 3;
 
-						LBMReal Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
+						real Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
 							mfaac*(-1 + uz) + mfaab*uz + mfaba*uz + mfabb*uz +
 							mfabc*uz + mfaca*uz + mfacb*uz + mfacc*uz +
 							mfaaa*(1 + uz))) -
@@ -1130,7 +1131,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								mfcac*(-1 + uz) + mfcab*uz + mfcba*uz + mfcbb*uz +
 								mfcbc*uz + mfcca*uz + mfccb*uz + mfccc*uz + mfcaa*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) -
 							ux*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1141,7 +1142,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom100 / 3;
 
 
-						LBMReal Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1162,7 +1163,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 									mfcaa*pow(1 + uz, 2))) - Mom100 / 3;
 
-						LBMReal Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfacc*(-1 + uz) + mfacb*uz +
@@ -1175,7 +1176,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							pow(uy, 2)*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1197,7 +1198,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 						//(220)//
-						LBMReal Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) +
 							pow(ux, 2)*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1207,7 +1208,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*pow(uy, 2) +
 								(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom000 / 9;
 
-						LBMReal Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1229,7 +1230,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 								mfcaa*pow(1 + uz, 2))) - Mom000 / 9;
 
-						LBMReal Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) +
@@ -1250,7 +1251,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(221)//
-						LBMReal Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
+						real Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1269,7 +1270,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 									mfcca*(1 + uz)))) - Mom001 / 9;
 
-						LBMReal Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
+						real Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1291,7 +1292,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									(-1 + uy)*(mfccc*pow(-1 + uz, 2) + mfccb*pow(uz, 2) +
 										mfcca*pow(1 + uz, 2)))) - Mom010 / 9;
 
-						LBMReal Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
+						real Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1314,7 +1315,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(211)//
-						LBMReal Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz)) +
 							uy*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) +
 							(-1 + uy)*(mfacc*(-1 + uz) + mfacb*uz + mfaca*(1 + uz))) +
@@ -1327,7 +1328,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								uy*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) +
 								(-1 + uy)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz)))) - Mom011 / 3;
 
-						LBMReal Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
+						real Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1346,7 +1347,7 @@ void ThixotropyLBMKernel::calculate(int step)
 										pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 											mfcca*(1 + uz)))) - Mom101 / 3;
 
-						LBMReal Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
+						real Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
 							mfaab*pow(uz, 2) + mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1367,7 +1368,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(222)//
-						LBMReal Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
+						real Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1394,7 +1395,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 
-						LBMReal Meq000 = drho+dlambda*c1o2;
+						real Meq000 = drho+dlambda*c1o2;
 
 
 						// relaxation Central Moment MRT
@@ -1815,46 +1816,46 @@ void ThixotropyLBMKernel::calculate(int step)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCalculationTime()
+real ThixotropyLBMKernel::getCalculationTime()
 {
 	//return timer.getDuration();
 	return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyLBMKernel::setCollisionFactorF(double collFactor)
+void ThixotropyLBMKernel::setCollisionFactorF(real collFactor)
 {
 	setCollisionFactor(collFactor);
 	this->collFactorF = collFactor;
 
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyLBMKernel::setCollisionFactorH(double collFactor)
+void ThixotropyLBMKernel::setCollisionFactorH(real collFactor)
 {
 	this->collFactorH = collFactor;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCollisionFactorF() const
+real ThixotropyLBMKernel::getCollisionFactorF() const
 {
 	return this->collFactorF;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCollisionFactorH() const
+real ThixotropyLBMKernel::getCollisionFactorH() const
 {
 	return this->collFactorH;
 }
-void ThixotropyLBMKernel::setAlpha(double alpha)
+void ThixotropyLBMKernel::setAlpha(real alpha)
 {
 	this->alpha = alpha;
 }
-double ThixotropyLBMKernel::getAlpha() const
+real ThixotropyLBMKernel::getAlpha() const
 {
 	return this->alpha;
 }
-void ThixotropyLBMKernel::setTheta(double theta)
+void ThixotropyLBMKernel::setTheta(real theta)
 {
 	this->theta = theta;
 }
-double ThixotropyLBMKernel::getTheta() const
+real ThixotropyLBMKernel::getTheta() const
 {
 	return this->theta;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
index c74af1b1eead237c03c12ba612434a286ebfc656..c638105425c20dfa64a221a02004ee1ece8879fd 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
@@ -22,50 +22,50 @@ public:
 	virtual ~ThixotropyLBMKernel(void);
 	virtual void calculate(int step);
 	virtual SPtr<LBMKernel> clone();
-	double getCalculationTime();
+	real getCalculationTime();
  
-	void setCollisionFactorF(double collFactor);
-   void setCollisionFactorH(double collFactor);
-   double getCollisionFactorF() const;
-   double getCollisionFactorH() const;
+	void setCollisionFactorF(real collFactor);
+   void setCollisionFactorH(real collFactor);
+   real getCollisionFactorF() const;
+   real getCollisionFactorH() const;
 
-	void setAlpha(double alpha);
-	double getAlpha() const;
+	void setAlpha(real alpha);
+	real getAlpha() const;
 
-	void setTheta(double theta);
-	double getTheta() const;
+	void setTheta(real theta);
+	real getTheta() const;
 
 	void swapDistributions();
 
 protected:
 	virtual void initDataSet();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	Parameter parameter;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
-	LBMReal collFactorF;
-   LBMReal collFactorH;
+	real collFactorF;
+   real collFactorH;
 
-	LBMReal theta;
-	LBMReal alpha;
+	real theta;
+	real alpha;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
index d9ce56aa8c4ca5f7a0e2318a9d48120b66f06705..83d6a791e761190e3cdd34d101c4fd1d1588a191 100644
--- a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
@@ -31,4 +31,4 @@ SPtr<LBMKernel> VoidLBMKernel::clone()
 //////////////////////////////////////////////////////////////////////////
 void VoidLBMKernel::calculate(int step) {}
 //////////////////////////////////////////////////////////////////////////
-double VoidLBMKernel::getCalculationTime() { return 0.0; }
+real VoidLBMKernel::getCalculationTime() { return 0.0; }
diff --git a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
index b9b4b5d2d2c53f91871c3770a3acda0401842efe..0984cab144021c3895bf8cb85f50efbc94476e6b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
@@ -10,7 +10,7 @@ public:
     ~VoidLBMKernel() override;
     SPtr<LBMKernel> clone() override;
     void calculate(int step) override;
-    double getCalculationTime() override;
+    real getCalculationTime() override;
     void initDataSet();
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
index 74627f6181cd02002e2bc2c7a2d284ff288f3c59..bbc4face9f784d120c80dcfbbbd73d07951fe49b 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
@@ -1,14 +1,16 @@
 #ifndef _MPI_STRUCTURES_H_
 #define _MPI_STRUCTURES_H_
 
+#include "lbm/constants/D3Q27.h"
+
 namespace MPIIODataStructures
 {
 //! \struct GridParam
 //! \brief Structure describes parameters of the grid
 //! \details The structure is nessasary to restore the grid correctly
 struct GridParam {
-    double trafoParams[33];
-    double deltaX;
+    real trafoParams[33];
+    real deltaX;
     int blockNx1;
     int blockNx2;
     int blockNx3;
@@ -57,11 +59,11 @@ struct dataSetParam {
 //! \brief Structure describes parameters of the dataSet in MPIIORestartCoProcessor format
 //! \details The structure is used when reading from the file
 struct DataSetRestart {
-    double collFactor;
-    double deltaT;
-    double collFactorL; // for Multiphase model
-    double collFactorG; // for Multiphase model
-    double densityRatio;// for Multiphase model
+    real collFactor;
+    real deltaT;
+    real collFactorL; // for Multiphase model
+    real collFactorG; // for Multiphase model
+    real densityRatio;// for Multiphase model
     int x1;
     int x2;
     int x3;
@@ -75,11 +77,11 @@ struct DataSetRestart {
 //! \brief Structure describes parameters of the dataSet in MPIIOMigrationCoProcessor format
 //! \details The structure is used to find the needed block in the grid when restoring a dataSet
 struct DataSetMigration {
-    double collFactor;
-    double deltaT;
-    double collFactorL; // for Multiphase model
-    double collFactorG; // for Multiphase model
-    double densityRatio;// for Multiphase model
+    real collFactor;
+    real deltaT;
+    real collFactorL; // for Multiphase model
+    real collFactorG; // for Multiphase model
+    real densityRatio;// for Multiphase model
     int globalID;
     int ghostLayerWidth;
     bool compressible;
@@ -113,14 +115,14 @@ struct BoundaryCondition {
     long long densityBoundaryFlags;
     long long wallModelBoundaryFlags;
 
-    float bcVelocityX1;
-    float bcVelocityX2;
-    float bcVelocityX3;
-    float bcDensity;
-    float bcPhaseField;
+    real bcVelocityX1;
+    real bcVelocityX2;
+    real bcVelocityX3;
+    real bcDensity;
+    real bcPhaseField;
 
-    float nx1, nx2, nx3;
-    float q[26];
+    real nx1, nx2, nx3;
+    real q[26];
 
     char algorithmType;
 };
diff --git a/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h b/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
index ee9e56af9f8578b5ea406a270de1cc1c9986f11e..0f80e380c2d9cfc4d8595ba5284bcbec9276b846 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
@@ -27,9 +27,9 @@ public:
         if (p == 1)
             return { 1, 1, 1 };
 
-        double a = pow(p * pow(x, 3.0) / xyz, 1.0 / 3.0);
-        double b = pow(p * pow(y, 3.0) / xyz, 1.0 / 3.0);
-        double c = pow(p * pow(z, 3.0) / xyz, 1.0 / 3.0);
+        real a = pow(p * pow(x, 3.0) / xyz, 1.0 / 3.0);
+        real b = pow(p * pow(y, 3.0) / xyz, 1.0 / 3.0);
+        real c = pow(p * pow(z, 3.0) / xyz, 1.0 / 3.0);
 
         MaxDim maxDim;
 
diff --git a/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h b/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
index 402e6b9603ff38a8236579f132f74fbf0a43c9e1..b599786b76ebdf0187572abec687cfff439120c7 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
@@ -46,12 +46,12 @@ public:
 protected:
     static int get_number_of_vertices(void *data, int *ierr);
     static void get_vertex_list(void *data, int sizeGID, int sizeLID, ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
-                                int wgt_dim, float *obj_wgts, int *ierr);
+                                int wgt_dim, real *obj_wgts, int *ierr);
     static void get_num_edges_list(void *data, int sizeGID, int sizeLID, int num_obj, ZOLTAN_ID_PTR globalID,
                                    ZOLTAN_ID_PTR localID, int *numEdges, int *ierr);
     static void get_edge_list(void *data, int sizeGID, int sizeLID, int num_obj, ZOLTAN_ID_PTR globalID,
                               ZOLTAN_ID_PTR localID, int *num_edges, ZOLTAN_ID_PTR nborGID, int *nborProc, int wgt_dim,
-                              float *ewgts, int *ierr);
+                              real *ewgts, int *ierr);
 
 private:
     MPI_Comm comm;
diff --git a/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp b/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
index db7c73c93b680161aa8819905d1237725a2f8f60..3fbd3643d71409fe21aa800473310399757f3a44 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
@@ -27,10 +27,10 @@ namespace Utilities
                   if (bc->hasNoSlipBoundaryFlag(fdir))
                   {
                      const int invDir = D3Q27System::INVDIR[fdir];
-                     float q = (float) bc->getQ(invDir);
+                     real q = (real) bc->getQ(invDir);
                      //double r = (double)UbRandom::rand(-50, 50);
-                     float r = (float)UbRandom::rand(-10, 10);
-                     float q_temp = q + q/r;
+                     real r = (real)UbRandom::rand(-10, 10);
+                     real q_temp = q + q/r;
                      if (q_temp < 0.0)
                      {
                         q_temp = 0.0001f;
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
index 6e7968f37493476ac7f076b4d7aa129b56c7326f..53282294203213fe98b8867dfaf2fde523490bc5 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
@@ -134,8 +134,8 @@ void CheckpointConverter::convert(int step, int procCount)
 void CheckpointConverter::convertBlocks(int step, int procCount)
 {
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     start = MPI_Wtime();
 
     // file to read from
@@ -298,8 +298,8 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         throw UbException(UB_EXARGS, "couldn't open file " + filenameW);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     start = MPI_Wtime();
 
     int blocksCount = 0;
@@ -307,7 +307,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
     DataSetRestart *dataSetReadArray;
     DataSetMigration *dataSetWriteArray;
     size_t doubleCountInBlock;
-    std::vector<double> doubleValuesArray;
+    std::vector<real> doubleValuesArray;
     size_t sizeofOneDataSet;
 
     // calculate the read offset
@@ -341,7 +341,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         // offset to read the data of the next process
         read_offset =
             read_offset + (MPI_Offset)(3 * sizeof(dataSetParam) +
-                                       blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double)));
+                                       blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real)));
 
         // write parameters of data arrays
         MPI_File_write_at(file_handlerW, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -350,7 +350,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         MPI_File_write_at(file_handlerW, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
                           MPI_STATUS_IGNORE);
 
-        sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real);
 
         // write blocks and their data arrays
         for (int nb = 0; nb < blocksCount; nb++) {
@@ -434,8 +434,8 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
 void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::string filenameR, std::string filenameW)
 {
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -456,7 +456,7 @@ void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::stri
     DataSetSmallRestart *dataSetSmallReadArray;
     DataSetSmallMigration *dataSetSmallWriteArray;
     int doubleCountInBlock;
-    std::vector<double> doubleValuesArray;
+    std::vector<real> doubleValuesArray;
 
     // calculate the read offset
     MPI_Offset read_offset = (MPI_Offset)(procCount * sizeof(int));
@@ -482,9 +482,9 @@ void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::stri
                 &doubleValuesArray[0], blocksCount * doubleCountInBlock, MPI_DOUBLE, MPI_STATUS_IGNORE);
 
         read_offset = read_offset + sizeof(dataSetParam) +
-                      blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+                      blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
 
-        sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
         MPI_File_write_at(file_handlerW, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
@@ -529,8 +529,8 @@ void CheckpointConverter::convertBC(int step, int procCount)
     if (rcW != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filenameW);
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp b/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
index 5c6fe4e8e2b4a02e733d777db9c045bc57f5b6eb..7c5ffe1f3fbde3fa59756507721ccc04d2bbe365 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
@@ -22,10 +22,10 @@ namespace Utilities
          return false;
    }
 
-   //convert from double to int
-   static int cint(double x)
+   //convert from real to int
+   static int cint(real x)
    {
-      double intpart;
+      real intpart;
       if (modf(x,&intpart)>=.5)
          return static_cast<int> (floor(x)+1);
       else
@@ -34,7 +34,7 @@ namespace Utilities
 
    //create new mu parser for duct parabolic profile
    //inflow in X
-   static mu::Parser getDuctParaboloidX(double Cy, double Hy, double Cz, double Hz, double V)
+   static mu::Parser getDuctParaboloidX(real Cy, real Hy, real Cz, real Hz, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x2-Cy)^2.0+(Hy/2.0)^2.0)/(Hy/2.0)^2.0)*((-(x3-Cz)^2.0+(Hz/2.0)^2.0)/(Hz/2.0)^2.0))" );
@@ -46,7 +46,7 @@ namespace Utilities
       return fct;
    }
    //inflow in Y
-   static mu::Parser getDuctParaboloidY(double Cx, double Hx, double Cz, double Hz, double V)
+   static mu::Parser getDuctParaboloidY(real Cx, real Hx, real Cz, real Hz, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x1-Cx)^2.0+(Hx/2.0)^2.0)/(Hx/2.0)^2.0)*((-(x3-Cz)^2.0+(Hz/2.0)^2.0)/(Hz/2.0)^2.0))" );
@@ -58,7 +58,7 @@ namespace Utilities
       return fct;
    }
    //inflow in Z
-   static mu::Parser getDuctParaboloidZ(double Cx, double Hx, double Cy, double Hy, double V)
+   static mu::Parser getDuctParaboloidZ(real Cx, real Hx, real Cy, real Hy, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x1-Cx)^2.0+(Hx/2.0)^2.0)/(Hx/2.0)^2.0)*((-(x2-Cy)^2.0+(Hy/2.0)^2.0)/(Hy/2.0)^2.0))" );
@@ -85,11 +85,11 @@ namespace Utilities
       return hash;
    }
    //linear interpolation
-   static double linear_interpolation1D(double x0, double y0, double x1, double y1, double x)
+   static real linear_interpolation1D(real x0, real y0, real x1, real y1, real x)
    {
-      double a = (y1 - y0) / (x1 - x0);
-      double b = -a*x0 + y0;
-      double y = a * x + b;
+      real a = (y1 - y0) / (x1 - x0);
+      real b = -a*x0 + y0;
+      real y = a * x + b;
       return y;
    }
 }
diff --git a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
index 4c9f30a902196f8fef5187442f45b94dc64de283..ec5e9b0981d4a02aeca48d8a33d5a52e018aaf87 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
+++ b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
@@ -66,6 +66,7 @@
 #include <string>
 #include <vector>
 #include "Grid3D.h"
+#include "lbm/constants/D3Q27.h"
 
 //////////////////////////////////////////////////////////////////////////
 // MemoryUtil
@@ -179,8 +180,8 @@ static std::string toString(SPtr<Grid3D> grid, int numberOfProcesses)
     unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (val<1>(blockNx) + ghostLayer) *
                                                              (val<2>(blockNx) + ghostLayer) *
                                                              (val<3>(blockNx) + ghostLayer);
-    double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(double)+sizeof(int)+sizeof(float)*4));
-    double needMem = needMemAll / double(numberOfProcesses);
+    real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(real)+sizeof(int)+sizeof(real)*4));
+    real needMem = needMemAll / real(numberOfProcesses);
     
     std::ostringstream out;
     out << "Grid information:" << std::endl;
@@ -197,7 +198,7 @@ static std::string toString(SPtr<Grid3D> grid, int numberOfProcesses)
     }
     out << "# Necessary memory  = " << needMemAll << " bytes" << std::endl;
     out << "# Necessary memory per process = " << needMem << " bytes" << std::endl;
-    out << "# Available memory per process = " << (double)getTotalPhysMem() << " bytes" << std::endl;
+    out << "# Available memory per process = " << (real)getTotalPhysMem() << " bytes" << std::endl;
     out << "###################################################" << std::endl;
 
     return out.str();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
index f5c87b9fc695d81ad492f89113f2d9e5c56fa9a7..fbfbd1bcab135056fa6b62e31d50b63c898bb83a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
@@ -76,7 +76,7 @@ void BoundaryConditionsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> bloc
         SPtr<BCArray3D> bcArray = bcProcessor->getBCArray();
 
         bool compressible = kernel->getCompressible();
-        double collFactor = kernel->getCollisionFactor();
+        real collFactor = kernel->getCollisionFactor();
 
         int minX1 = 0;
         int minX2 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
index e26b59729594fc3175e523e25d23ce7adc56d74e..c541465183dd084135d60b7112182daae33e22ab 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
@@ -7,7 +7,7 @@
 #include "D3Q27System.h"
 #include "LBMKernel.h"
 
-ChangeBoundaryDensityBlockVisitor::ChangeBoundaryDensityBlockVisitor(float oldBoundaryDensity, float newBoundaryDensity)
+ChangeBoundaryDensityBlockVisitor::ChangeBoundaryDensityBlockVisitor(real oldBoundaryDensity, real newBoundaryDensity)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), oldBoundaryDensity(oldBoundaryDensity),
       newBoundaryDensity(newBoundaryDensity)
 {
@@ -35,7 +35,7 @@ void ChangeBoundaryDensityBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> b
                         bcPtr = bcArray->getBC(x1, x2, x3);
                         if (bcPtr) {
                             if (bcPtr->hasDensityBoundary()) {
-                                float bcDensity = (float)bcPtr->getBoundaryDensity();
+                                real bcDensity = (real)bcPtr->getBoundaryDensity();
                                 if (bcDensity == oldBoundaryDensity) {
                                     bcPtr->setBoundaryDensity(newBoundaryDensity);
                                 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
index 256448a0602bb6e5ab45fc4116aac35073795ddb..64592f7bb2f81e1df1b22cdc6bcbb6bbb7528dfc 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
@@ -4,6 +4,7 @@
 #include <PointerDefinitions.h>
 
 #include "Block3DVisitor.h"
+#include "lbm/constants/D3Q27.h"
 
 class Block3D;
 class Grid3D;
@@ -12,14 +13,14 @@ class BoundaryConditions;
 class ChangeBoundaryDensityBlockVisitor : public Block3DVisitor
 {
 public:
-    ChangeBoundaryDensityBlockVisitor(float oldBoundaryDensity, float newBoundaryDensity);
+    ChangeBoundaryDensityBlockVisitor(real oldBoundaryDensity, real newBoundaryDensity);
     ~ChangeBoundaryDensityBlockVisitor() override;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    float oldBoundaryDensity;
-    float newBoundaryDensity;
+    real oldBoundaryDensity;
+    real newBoundaryDensity;
     SPtr<BoundaryConditions> bcPtr;
 };
 #endif // ChangeBoundaryDensityBlockVisitor_h__
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
index 7602438a23f16295f8d518f70d5a036dac4515ec..b931cbbbda004f7b2057943222d4523c5fb0916b 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
@@ -87,22 +87,22 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         string sendPoolKey    = generatePoolKey(srcRank, srcLevel, tgtRank, tgtLevel);
         string receivePoolKey = generatePoolKey(tgtRank, tgtLevel, srcRank, srcLevel);
 
-        TbCbVectorMpiPool<LBMReal>::MpiPoolPtr sendPool = TbCbVectorMpiPool<LBMReal>::getTbCbVectorMpiPool(sendPoolKey);
-        TbCbVectorMpiPool<LBMReal>::MpiPoolPtr recvPool =
-            TbCbVectorMpiPool<LBMReal>::getTbCbVectorMpiPool(receivePoolKey);
+        TbCbVectorMpiPool<real>::MpiPoolPtr sendPool = TbCbVectorMpiPool<real>::getTbCbVectorMpiPool(sendPoolKey);
+        TbCbVectorMpiPool<real>::MpiPoolPtr recvPool =
+            TbCbVectorMpiPool<real>::getTbCbVectorMpiPool(receivePoolKey);
 
         MPI_Comm mpi_comm = *((MPI_Comm *)comm->getNativeCommunicator());
 
         if (!sendPool)
-            sendPool = TbCbVectorMpiPool<LBMReal>::createTbCbVectorMpiPool(
+            sendPool = TbCbVectorMpiPool<real>::createTbCbVectorMpiPool(
                 sendPoolKey, tgtRank, generateMPITag(srcLevel, tgtLevel), mpi_comm);
         if (!recvPool)
-            recvPool = TbCbVectorMpiPool<LBMReal>::createTbCbVectorMpiPool(
+            recvPool = TbCbVectorMpiPool<real>::createTbCbVectorMpiPool(
                 receivePoolKey, tgtRank, generateMPITag(tgtLevel, srcLevel), mpi_comm);
 
-        TbCbVectorMpiPool<LBMReal>::CbVectorKey keyOfSendCbVectorKey =
+        TbCbVectorMpiPool<real>::CbVectorKey keyOfSendCbVectorKey =
             generateVectorKey(sblock->getX1(), sblock->getX2(), sblock->getX3() /*tgtID*/, dir, ib);
-        TbCbVectorMpiPool<LBMReal>::CbVectorKey keyOfRecvCbVectorKey =
+        TbCbVectorMpiPool<real>::CbVectorKey keyOfRecvCbVectorKey =
             generateVectorKey(tblock->getX1(), tblock->getX2(), tblock->getX3() /*srcID*/, invDir, ib);
 
         ////////////////////////////////////////////////////////
@@ -118,8 +118,8 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         ////////////////////////////////////////////////////////
 
         // create sender-/receiver
-        sender   = TransmitterPtr(new TbCbVectorSenderMpiPool<LBMReal>(keyOfSendCbVectorKey, sendPool.get()));
-        receiver = TransmitterPtr(new TbCbVectorReceiverMpiPool<LBMReal>(keyOfRecvCbVectorKey, recvPool.get()));
+        sender   = TransmitterPtr(new TbCbVectorSenderMpiPool<real>(keyOfSendCbVectorKey, sendPool.get()));
+        receiver = TransmitterPtr(new TbCbVectorReceiverMpiPool<real>(keyOfRecvCbVectorKey, recvPool.get()));
     }
 #ifdef VF_FETOL
     if (tType == BOND) {
@@ -129,24 +129,24 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         int sendBondPoolKey    = generatePoolKey(srcBondRank, srcLevel, tgtBondRank, tgtLevel);
         int receiveBondPoolKey = generatePoolKey(tgtBondRank, tgtLevel, srcBondRank, srcLevel);
 
-        TbCbVectorBondPool<LBMReal>::BondPoolPtr sendPool =
-            TbCbVectorBondPool<LBMReal>::getTbCbVectorBondPool(sendBondPoolKey);
-        TbCbVectorBondPool<LBMReal>::BondPoolPtr recvPool =
-            TbCbVectorBondPool<LBMReal>::getTbCbVectorBondPool(receiveBondPoolKey);
+        TbCbVectorBondPool<real>::BondPoolPtr sendPool =
+            TbCbVectorBondPool<real>::getTbCbVectorBondPool(sendBondPoolKey);
+        TbCbVectorBondPool<real>::BondPoolPtr recvPool =
+            TbCbVectorBondPool<real>::getTbCbVectorBondPool(receiveBondPoolKey);
 
         if (!sendPool)
-            sendPool = TbCbVectorBondPool<LBMReal>::createTbCbVectorBondPool(sendBondPoolKey, tgtBondRank,
+            sendPool = TbCbVectorBondPool<real>::createTbCbVectorBondPool(sendBondPoolKey, tgtBondRank,
                                                                              generateMPITag(srcLevel, tgtLevel));
         if (!recvPool)
-            recvPool = TbCbVectorBondPool<LBMReal>::createTbCbVectorBondPool(receiveBondPoolKey, tgtBondRank,
+            recvPool = TbCbVectorBondPool<real>::createTbCbVectorBondPool(receiveBondPoolKey, tgtBondRank,
                                                                              generateMPITag(tgtLevel, srcLevel));
 
-        TbCbVectorBondPool<LBMReal>::CbVectorKey keyOfSendCbVectorKey = generateVectorKey(tgtID, dir, ib);
-        TbCbVectorBondPool<LBMReal>::CbVectorKey keyOfRecvCbVectorKey = generateVectorKey(srcID, invDir, ib);
+        TbCbVectorBondPool<real>::CbVectorKey keyOfSendCbVectorKey = generateVectorKey(tgtID, dir, ib);
+        TbCbVectorBondPool<real>::CbVectorKey keyOfRecvCbVectorKey = generateVectorKey(srcID, invDir, ib);
 
         // create sender-/receiver
-        sender   = TransmitterPtr(new TbCbVectorSenderBondPool<LBMReal>(keyOfSendCbVectorKey, sendPool.get()));
-        receiver = TransmitterPtr(new TbCbVectorReceiverBondPool<LBMReal>(keyOfRecvCbVectorKey, recvPool.get()));
+        sender   = TransmitterPtr(new TbCbVectorSenderBondPool<real>(keyOfSendCbVectorKey, sendPool.get()));
+        receiver = TransmitterPtr(new TbCbVectorReceiverBondPool<real>(keyOfRecvCbVectorKey, recvPool.get()));
     }
 #endif
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
index d51f6352a251fe360aaf2a8365c77315e099d4d2..af60de0a2e2b9e06488df3011584b8448594bf85 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
@@ -55,7 +55,7 @@ public:
     enum TransmitterType { MPI, BOND, MPI2BOND };
 
 public:
-    using DataType       = CbVector<LBMReal>;
+    using DataType       = CbVector<real>;
     using TransmitterPtr = SPtr<TbTransmitter<DataType>>;
 
 public:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
index 29ea3bfda98c2ce191d1f7c5bc20691049dc2a04..eec58e1643ec3c3f3aac63899f019247f8b0851e 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
@@ -43,17 +43,17 @@ GenBlocksGridVisitor::GenBlocksGridVisitor(SPtr<GbObject3D> boundingBox) : bound
 //////////////////////////////////////////////////////////////////////////
 void GenBlocksGridVisitor::visit(const SPtr<Grid3D> grid)
 {
-    double orgX1 = boundingBox->getX1Minimum();
-    double orgX2 = boundingBox->getX2Minimum();
-    double orgX3 = boundingBox->getX3Minimum();
+    real orgX1 = boundingBox->getX1Minimum();
+    real orgX2 = boundingBox->getX2Minimum();
+    real orgX3 = boundingBox->getX3Minimum();
 
-    double dx = grid->getDeltaX(0);
+    real dx = grid->getDeltaX(0);
 
     UbTupleInt3 blockNX = grid->getBlockNX();
 
-    double blockLentghX1 = (double)val<1>(blockNX) * dx;
-    double blockLentghX2 = (double)val<2>(blockNX) * dx;
-    double blockLentghX3 = (double)val<3>(blockNX) * dx;
+    real blockLentghX1 = (real)val<1>(blockNX) * dx;
+    real blockLentghX2 = (real)val<2>(blockNX) * dx;
+    real blockLentghX3 = (real)val<3>(blockNX) * dx;
 
     SPtr<CoordinateTransformation3D> trafo(
         new CoordinateTransformation3D(orgX1, orgX2, orgX3, blockLentghX1, blockLentghX2, blockLentghX3));
@@ -78,9 +78,9 @@ void GenBlocksGridVisitor::genBlocks(SPtr<Grid3D> grid)
 {
     minInd =
         grid->getBlockIndexes(boundingBox->getX1Minimum(), boundingBox->getX2Minimum(), boundingBox->getX3Minimum());
-    double geoMaxX1           = boundingBox->getX1Maximum();
-    double geoMaxX2           = boundingBox->getX2Maximum();
-    double geoMaxX3           = boundingBox->getX3Maximum();
+    real geoMaxX1           = boundingBox->getX1Maximum();
+    real geoMaxX2           = boundingBox->getX2Maximum();
+    real geoMaxX3           = boundingBox->getX3Maximum();
     maxInd                    = grid->getBlockIndexes(geoMaxX1, geoMaxX2, geoMaxX3);
     UbTupleDouble3 blockCoord = grid->getBlockWorldCoordinates(
         static_cast<int>(val<1>(maxInd)), static_cast<int>(val<2>(maxInd)), static_cast<int>(val<3>(maxInd)), 0);
@@ -91,7 +91,7 @@ void GenBlocksGridVisitor::genBlocks(SPtr<Grid3D> grid)
     // if (geoMaxX3 > val<3>(blockCoord))
     //    val<3>(maxInd) += 1;
 
-    double dx = grid->getDeltaX(0);
+    real dx = grid->getDeltaX(0);
     if (fabs(geoMaxX1 - val<1>(blockCoord)) > dx)
         val<1>(maxInd) += 1;
     if (fabs(geoMaxX2 - val<2>(blockCoord)) > dx)
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
index 0ba49c1a0683d052a07caae46410b5ea8c35aad7..1c4860070a5ca8aefc4850a9b16dd7273c65f231 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
@@ -97,25 +97,25 @@ void InitDistributionsBlockVisitor::setRho(const std::string &muParserString)
     this->checkFunction(muRho);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx1(LBMReal vx1)
+void InitDistributionsBlockVisitor::setVx1(real vx1)
 {
     this->muVx1.SetExpr(UbSystem::toString(vx1, D3Q27RealLim::digits10));
     this->checkFunction(muVx1);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx2(LBMReal vx2)
+void InitDistributionsBlockVisitor::setVx2(real vx2)
 {
     this->muVx2.SetExpr(UbSystem::toString(vx2, D3Q27RealLim::digits10));
     this->checkFunction(muVx2);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx3(LBMReal vx3)
+void InitDistributionsBlockVisitor::setVx3(real vx3)
 {
     this->muVx3.SetExpr(UbSystem::toString(vx3, D3Q27RealLim::digits10));
     this->checkFunction(muVx3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setRho(LBMReal rho)
+void InitDistributionsBlockVisitor::setRho(real rho)
 {
     this->muRho.SetExpr(UbSystem::toString(rho, D3Q27RealLim::digits10));
     this->checkFunction(muRho);
@@ -124,10 +124,11 @@ void InitDistributionsBlockVisitor::setRho(LBMReal rho)
 void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
-   double dx = grid->getDeltaX(block);
+   real dx = grid->getDeltaX(block);
 
    //define vars for functions
    mu::value_type x1,x2,x3;
@@ -136,11 +137,11 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
    this->muVx3.DefineVar("x1",&x1); this->muVx3.DefineVar("x2",&x2); this->muVx3.DefineVar("x3",&x3);
    this->muRho.DefineVar("x1",&x1); this->muRho.DefineVar("x2",&x2); this->muRho.DefineVar("x3",&x3);
 
-    using CalcFeqsFct = void (*)(LBMReal *const & /*feq[27]*/, const LBMReal & /*(d)rho*/, const LBMReal & /*vx1*/,
-                                 const LBMReal & /*vx2*/, const LBMReal & /*vx3*/);
+    using CalcFeqsFct = void (*)(real *const & /*feq[27]*/, const real & /*(d)rho*/, const real & /*vx1*/,
+                                 const real & /*vx2*/, const real & /*vx3*/);
     CalcFeqsFct calcFeqsFct = NULL;
    
-   LBMReal vx1, vx2, vx3, rho;
+   real vx1, vx2, vx3, rho;
 
    int gridRank = grid->getRank();
    int blockRank = block->getRank();
@@ -159,9 +160,9 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
       SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
       SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();  
 
-      LBMReal o  = kernel->getCollisionFactor();
+      real o  = kernel->getCollisionFactor();
 
-      LBMReal f[D3Q27System::ENDF+1];
+      real f[D3Q27System::ENDF+1];
 
       for(std::size_t ix3=0; ix3<bcArray->getNX3(); ix3++)
          for(std::size_t ix2=0; ix2<bcArray->getNX2(); ix2++)
@@ -178,73 +179,73 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
                rho = muRho.Eval();
 
                //x-derivative
-               double deltaX=dx*0.5;
+               real deltaX=dx*0.5;
                x1 = coords[0]+deltaX;
-               double vx1Plusx1 = muVx1.Eval();
-               double vx2Plusx1 = muVx2.Eval();
-               double vx3Plusx1 = muVx3.Eval();
+               real vx1Plusx1 = muVx1.Eval();
+               real vx2Plusx1 = muVx2.Eval();
+               real vx3Plusx1 = muVx3.Eval();
 
                x1 = coords[0]-deltaX;
-               double vx1Minusx1 = muVx1.Eval();
-               double vx2Minusx1 = muVx2.Eval();
-               double vx3Minusx1 = muVx3.Eval();
+               real vx1Minusx1 = muVx1.Eval();
+               real vx2Minusx1 = muVx2.Eval();
+               real vx3Minusx1 = muVx3.Eval();
 
                //y-derivative
                x1 = coords[0];
                x2 = coords[1]+deltaX;
-               double vx1Plusx2 = muVx1.Eval();
-               double vx2Plusx2 = muVx2.Eval();
-               double vx3Plusx2 = muVx3.Eval();
+               real vx1Plusx2 = muVx1.Eval();
+               real vx2Plusx2 = muVx2.Eval();
+               real vx3Plusx2 = muVx3.Eval();
 
                x2 = coords[1]-deltaX;
-               double vx1Minusx2 = muVx1.Eval();
-               double vx2Minusx2 = muVx2.Eval();
-               double vx3Minusx2 = muVx3.Eval();
+               real vx1Minusx2 = muVx1.Eval();
+               real vx2Minusx2 = muVx2.Eval();
+               real vx3Minusx2 = muVx3.Eval();
 
                //z-derivative
                x2 = coords[1];
                x3 = coords[2]+deltaX;
-               double vx1Plusx3 = muVx1.Eval();
-               double vx2Plusx3 = muVx2.Eval();
-               double vx3Plusx3 = muVx3.Eval();
+               real vx1Plusx3 = muVx1.Eval();
+               real vx2Plusx3 = muVx2.Eval();
+               real vx3Plusx3 = muVx3.Eval();
 
                x3 = coords[2]-deltaX;
-               double vx1Minusx3 = muVx1.Eval();
-               double vx2Minusx3 = muVx2.Eval();
-               double vx3Minusx3 = muVx3.Eval();
+               real vx1Minusx3 = muVx1.Eval();
+               real vx2Minusx3 = muVx2.Eval();
+               real vx3Minusx3 = muVx3.Eval();
 
-               double ax=(vx1Plusx1-vx1Minusx1)/(2.0*deltaX)*dx;
-               double bx=(vx2Plusx1-vx2Minusx1)/(2.0*deltaX)*dx;
-               double cx=(vx3Plusx1-vx3Minusx1)/(2.0*deltaX)*dx;
+               real ax=(vx1Plusx1-vx1Minusx1)/(2.0*deltaX)*dx;
+               real bx=(vx2Plusx1-vx2Minusx1)/(2.0*deltaX)*dx;
+               real cx=(vx3Plusx1-vx3Minusx1)/(2.0*deltaX)*dx;
 
-               double ay=(vx1Plusx2-vx1Minusx2)/(2.0*deltaX)*dx;
-               double by=(vx2Plusx2-vx2Minusx2)/(2.0*deltaX)*dx;
-               double cy=(vx3Plusx2-vx3Minusx2)/(2.0*deltaX)*dx;
+               real ay=(vx1Plusx2-vx1Minusx2)/(2.0*deltaX)*dx;
+               real by=(vx2Plusx2-vx2Minusx2)/(2.0*deltaX)*dx;
+               real cy=(vx3Plusx2-vx3Minusx2)/(2.0*deltaX)*dx;
 
-               double az=(vx1Plusx3-vx1Minusx3)/(2.0*deltaX)*dx;
-               double bz=(vx2Plusx3-vx2Minusx3)/(2.0*deltaX)*dx;
-               double cz=(vx3Plusx3-vx3Minusx3)/(2.0*deltaX)*dx;
-               double eps_new=1.0;
-               LBMReal op = 1.;
+               real az=(vx1Plusx3-vx1Minusx3)/(2.0*deltaX)*dx;
+               real bz=(vx2Plusx3-vx2Minusx3)/(2.0*deltaX)*dx;
+               real cz=(vx3Plusx3-vx3Minusx3)/(2.0*deltaX)*dx;
+               real eps_new=1.0;
+               real op = 1.;
 
-               LBMReal feq[27];
+               real feq[27];
 
                calcFeqsFct(feq,rho,vx1,vx2,vx3);
 
-               double f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
-               double f_N    = f_E + eps_new *((2.*(ax - by))/(9.*o));
-               double f_T    = f_E + eps_new *((2.*(ax - cz))/(9.*o));
-               double f_NE   = eps_new *(-(5.*cz*o + 3.*(ay + bx)*op - 2.*cz*op + ax*(5.*o + op) + by*(5.*o + op))/(54.*o*op));
-               double f_SE   = f_NE + eps_new *((  ay + bx )/(9.*o));
-               double f_TE   = eps_new *(-(5.*cz*o + by*(5.*o - 2.*op) + 3.*(az + cx)*op + cz*op + ax*(5.*o + op))/(54.*o*op));
-               double f_BE   = f_TE + eps_new *((  az + cx )/(9.*o));
-               double f_TN   = eps_new *(-(5.*ax*o + 5.*by*o + 5.*cz*o - 2.*ax*op + by*op + 3.*bz*op + 3.*cy*op + cz*op)/(54.*o*op));
-               double f_BN   = f_TN + eps_new *((  bz + cy )/(9.*o));
-               double f_ZERO = eps_new *((5.*(ax + by + cz))/(9.*op));
-               double f_TNE  = eps_new *(-(ay + az + bx + bz + cx + cy)/(72.*o));
-               double f_TSW  = - eps_new *((ay + bx)/(36.*o)) - f_TNE;
-               double f_TSE  = - eps_new *((az + cx)/(36.*o)) - f_TNE;
-               double f_TNW  = - eps_new *((bz + cy)/(36.*o)) - f_TNE;
+               real f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
+               real f_N    = f_E + eps_new *((2.*(ax - by))/(9.*o));
+               real f_T    = f_E + eps_new *((2.*(ax - cz))/(9.*o));
+               real f_NE   = eps_new *(-(5.*cz*o + 3.*(ay + bx)*op - 2.*cz*op + ax*(5.*o + op) + by*(5.*o + op))/(54.*o*op));
+               real f_SE   = f_NE + eps_new *((  ay + bx )/(9.*o));
+               real f_TE   = eps_new *(-(5.*cz*o + by*(5.*o - 2.*op) + 3.*(az + cx)*op + cz*op + ax*(5.*o + op))/(54.*o*op));
+               real f_BE   = f_TE + eps_new *((  az + cx )/(9.*o));
+               real f_TN   = eps_new *(-(5.*ax*o + 5.*by*o + 5.*cz*o - 2.*ax*op + by*op + 3.*bz*op + 3.*cy*op + cz*op)/(54.*o*op));
+               real f_BN   = f_TN + eps_new *((  bz + cy )/(9.*o));
+               real f_ZERO = eps_new *((5.*(ax + by + cz))/(9.*op));
+               real f_TNE  = eps_new *(-(ay + az + bx + bz + cx + cy)/(72.*o));
+               real f_TSW  = - eps_new *((ay + bx)/(36.*o)) - f_TNE;
+               real f_TSE  = - eps_new *((az + cx)/(36.*o)) - f_TNE;
+               real f_TNW  = - eps_new *((bz + cy)/(36.*o)) - f_TNE;
 
 
                f[DIR_P00]    = f_E    + feq[DIR_P00];
@@ -297,7 +298,7 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
 //////////////////////////////////////////////////////////////////////////
 void InitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-    double x1 = 1.0, x2 = 1.0, x3 = 1.0;
+    real x1 = 1.0, x2 = 1.0, x3 = 1.0;
     fct.DefineVar("x1", &x1);
     fct.DefineVar("x2", &x2);
     fct.DefineVar("x3", &x3);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
index c64f0ed936c9d9f527ec49f31e9646d5fa3150e9..68ba69f8388fe1dffe7ed1acad8d54619f799eb2 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
@@ -63,7 +63,7 @@ class Block3D;
 class InitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-    using D3Q27RealLim = std::numeric_limits<LBMReal>;
+    using D3Q27RealLim = std::numeric_limits<real>;
 
 public:
     InitDistributionsBlockVisitor();
@@ -81,10 +81,10 @@ public:
     void setVx3(const std::string &muParserString);
     void setRho(const std::string &muParserString);
     //////////////////////////////////////////////////////////////////////////
-    void setVx1(LBMReal vx1);
-    void setVx2(LBMReal vx2);
-    void setVx3(LBMReal vx3);
-    void setRho(LBMReal rho);
+    void setVx1(real vx1);
+    void setVx2(real vx2);
+    void setVx3(real vx3);
+    void setRho(real rho);
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
index 1bcb6057f5b5987ced9adc17e7d6fabd262911e6..2632f2c59db6d4982806c50dcc50f743cc5c2ad3 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
@@ -10,7 +10,7 @@
 #include "LBMKernel.h"
 #include <basics/utilities/UbFileInputASCII.h>
 
-InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ LBMReal rho,
+InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ real rho,
                                                                              std::string filename)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), /*nu(nu),*/ rho(rho)
 {
@@ -23,7 +23,7 @@ InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*L
     int nodesX2 = in.readInteger();
     int nodesX3 = in.readInteger();
 
-    matrix = CbArray4D<LBMReal, IndexerX4X3X2X1>(3, nodesX1, nodesX2, nodesX3, 0);
+    matrix = CbArray4D<real, IndexerX4X3X2X1>(3, nodesX1, nodesX2, nodesX3, 0);
 
     for (int x3 = 0; x3 < nodesX3; x3++)
         for (int x2 = 0; x2 < nodesX2; x2++)
@@ -52,11 +52,11 @@ void InitDistributionsFromFileBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<
     //   LBMReal o = LBMSystem::calcCollisionFactor(nu, block->getLevel());
 
     // Funktionszeiger
-    typedef void (*CalcFeqsFct)(LBMReal *const & /*feq[27]*/, const LBMReal & /*(d)rho*/, const LBMReal & /*vx1*/,
-                                const LBMReal & /*vx2*/, const LBMReal & /*vx3*/);
+    typedef void (*CalcFeqsFct)(real *const & /*feq[27]*/, const real & /*(d)rho*/, const real & /*vx1*/,
+                                const real & /*vx2*/, const real & /*vx3*/);
     CalcFeqsFct calcFeqsFct = NULL;
 
-    LBMReal vx1, vx2, vx3;
+    real vx1, vx2, vx3;
 
     int gridRank  = grid->getRank();
     int blockRank = block->getRank();
@@ -76,7 +76,7 @@ void InitDistributionsFromFileBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<
         SPtr<BCArray3D> bcArray        = kernel->getBCProcessor()->getBCArray();
         SPtr<EsoTwist3D> distributions = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getFdistributions());
 
-        LBMReal f[D3Q27System::ENDF + 1];
+        real f[D3Q27System::ENDF + 1];
 
         //      size_t nx1 = distributions->getNX1();
         //      size_t nx2 = distributions->getNX2();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
index 1f40abadeef750da38e03d3db30ba752d4ae9da0..cc7acc395d8b17358cc567692e46c67738328436 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
@@ -12,14 +12,14 @@ class Block3D;
 class InitDistributionsFromFileBlockVisitor : public Block3DVisitor
 {
 public:
-    InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ LBMReal rho, std::string file);
+    InitDistributionsFromFileBlockVisitor(/*real nu, */ real rho, std::string file);
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1> matrix;
+    CbArray4D<real, IndexerX4X3X2X1> matrix;
     enum Velocity { Vx1, Vx2, Vx3 };
     //   LBMReal nu;
-    LBMReal rho;
+    real rho;
 };
 #endif // InitDistributionsFromFileBlockVisitor_h__
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
index 567ce2e7ff5b40f3c8042bd404394a3fbf9ffee4..6dd6976ca3cb250e720079031632b9b5e3902696 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
@@ -16,7 +16,7 @@
 using namespace std;
 
 InitDistributionsWithInterpolationGridVisitor::InitDistributionsWithInterpolationGridVisitor(
-    SPtr<Grid3D> oldGrid, InterpolationProcessorPtr iProcessor, LBMReal nu)
+    SPtr<Grid3D> oldGrid, InterpolationProcessorPtr iProcessor, real nu)
     : oldGrid(oldGrid), iProcessor(iProcessor), nu(nu)
 {
 }
@@ -119,11 +119,11 @@ void InitDistributionsWithInterpolationGridVisitor::copyRemoteBlock(SPtr<Block3D
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -141,11 +141,11 @@ void InitDistributionsWithInterpolationGridVisitor::copyRemoteBlock(SPtr<Block3D
         SPtr<EsoTwist3D> newDistributions =
             dynamicPointerCast<EsoTwist3D>(newKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -165,10 +165,10 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateLocalBlockCoarseT
 {
     D3Q27ICell icellC;
     D3Q27ICell icellF;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
 
-    LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-    LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+    real omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+    real omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
     iProcessor->setOmegas(omegaC, omegaF);
 
@@ -265,11 +265,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -285,10 +285,10 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
     } else if (newBlockRank == newGridRank && newBlock->isActive()) {
         D3Q27ICell icellC;
         D3Q27ICell icellF;
-        LBMReal xoff, yoff, zoff;
+        real xoff, yoff, zoff;
 
-        LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-        LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+        real omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+        real omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
         iProcessor->setOmegas(omegaC, omegaF);
 
@@ -313,11 +313,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
 
         SPtr<EsoTwist3D> oldDistributions(new D3Q27EsoTwist3DSplittedVector(bMaxX1, bMaxX2, bMaxX3, 0));
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -393,12 +393,12 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
 void InitDistributionsWithInterpolationGridVisitor::interpolateLocalBlockFineToCoarse(SPtr<Block3D> oldBlock,
                                                                                       SPtr<Block3D> newBlock)
 {
-    LBMReal icellC[27];
+    real icellC[27];
     D3Q27ICell icellF;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
 
-    LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-    LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+    real omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+    real omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
     iProcessor->setOmegas(omegaC, omegaF);
 
@@ -496,11 +496,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -514,12 +514,12 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
         std::vector<int> &bcDataVector  = bcArrayOldBlock->getBcindexmatrixDataVector();
         MPI_Send(&bcDataVector[0], (int)bcDataVector.size(), MPI_INT, newBlockRank, 0, MPI_COMM_WORLD);
     } else if (newBlockRank == newGridRank && newBlock->isActive()) {
-        LBMReal icellC[27];
+        real icellC[27];
         D3Q27ICell icellF;
-        LBMReal xoff, yoff, zoff;
+        real xoff, yoff, zoff;
 
-        LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-        LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+        real omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+        real omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
         iProcessor->setOmegas(omegaC, omegaF);
 
@@ -544,11 +544,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
 
         SPtr<EsoTwist3D> oldDistributions(new D3Q27EsoTwist3DSplittedVector(bMaxX1, bMaxX2, bMaxX3, 0));
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
index 207656e1f3bfb287cf8cc1bd1270daf510ce9aa3..a143ab6b0b40a8b35023a4916ffcbd20ae4b726c 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
@@ -14,7 +14,7 @@ class InitDistributionsWithInterpolationGridVisitor : public Grid3DVisitor
 {
 public:
     InitDistributionsWithInterpolationGridVisitor(SPtr<Grid3D> oldGrid, SPtr<InterpolationProcessor> iProcessor,
-                                                  LBMReal nu);
+                                                  real nu);
     ~InitDistributionsWithInterpolationGridVisitor() override;
     void visit(SPtr<Grid3D> grid) override;
 
@@ -28,7 +28,7 @@ private:
 
     SPtr<Grid3D> newGrid;
     SPtr<Grid3D> oldGrid;
-    LBMReal nu;
+    real nu;
 
     SPtr<InterpolationProcessor> iProcessor;
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
index 9c8c05babe4fc2d454908095e8a232eb14434df2..0c666958912c7f73f74d91b179e19cf6d3b06dd1 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
@@ -209,7 +209,7 @@ void InitThixotropyBlockVisitor::setLambda(const std::string& muParserString)
 //	this->checkFunction(muf3);
 //}
 //////////////////////////////////////////////////////////////////////////
-void InitThixotropyBlockVisitor::setLambda(LBMReal lambda)
+void InitThixotropyBlockVisitor::setLambda(real lambda)
 {
    this->muLambda.SetExpr(UbSystem::toString(lambda, D3Q27RealLim::digits10));
    this->checkFunction(muLambda);
@@ -233,7 +233,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
    this->muLambda.DefineVar("x1",&x1); this->muLambda.DefineVar("x2",&x2); this->muLambda.DefineVar("x3",&x3);
 
    //Funktionszeiger
-   typedef void (*CalcFeqsFct)(LBMReal* const& /*feq[27]*/,const LBMReal& /*(d)rho*/,const LBMReal& /*vx1*/,const LBMReal& /*vx2*/,const LBMReal& /*vx3*/);
+   typedef void (*CalcFeqsFct)(real* const& /*feq[27]*/,const real& /*(d)rho*/,const real& /*vx1*/,const real& /*vx2*/,const real& /*vx3*/);
    CalcFeqsFct   calcFeqsFct   = NULL;
 
    int gridRank = grid->getRank();
@@ -253,7 +253,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
       SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
       SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getHdistributions();  
 
-      LBMReal h[D3Q27System::ENDF+1];
+      real h[D3Q27System::ENDF+1];
 
       for(std::size_t ix3=0; ix3<bcArray->getNX3(); ix3++)
          for(std::size_t ix2=0; ix2<bcArray->getNX2(); ix2++)
@@ -281,7 +281,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
                //distributionsf->setDistribution(f, ix1, ix2, ix3);
                //distributionsf->setDistributionInv(f, ix1, ix2, ix3);
 
-               LBMReal lambda = muLambda.Eval();
+               real lambda = muLambda.Eval();
                
                calcFeqsFct(h,lambda,0.0,0.0,0.0);
                
@@ -303,7 +303,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void InitThixotropyBlockVisitor::checkFunction(mu::Parser fct)
 {
-   double x1 = 1.0, x2 = 1.0, x3 = 1.0;
+   real x1 = 1.0, x2 = 1.0, x3 = 1.0;
    fct.DefineVar("x1", &x1);
    fct.DefineVar("x2", &x2);
    fct.DefineVar("x3", &x3);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
index a9105e027c0fed48dce613b2594d199ba7531f22..eb35a9ad7d7718bb0f22ec16c71ebbd7cb646eb0 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
@@ -51,7 +51,7 @@
 class InitThixotropyBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	InitThixotropyBlockVisitor();
@@ -98,7 +98,7 @@ public:
 	//void setf1(LBMReal f1);
 	//void setf2(LBMReal f2);
 	//void setf3(LBMReal f3);
-	void setLambda(LBMReal lambda);
+	void setLambda(real lambda);
 	//void setD(LBMReal D);
 
 	//void initialize(double* f, double x1, double x2, double x3, double vx1, double vx2, double vx3, double rho, UbTupleDouble3 coords, double dx, double o, bool NSE);
@@ -107,7 +107,7 @@ public:
 
 protected:
 	void checkFunction(mu::Parser fct);
-	typedef void(*CalcFeqsFct)(LBMReal* const& /*feq[27]*/, const LBMReal& /*(d)rho*/, const LBMReal& /*vx1*/, const LBMReal& /*vx2*/, const LBMReal& /*vx3*/);
+	typedef void(*CalcFeqsFct)(real* const& /*feq[27]*/, const real& /*(d)rho*/, const real& /*vx1*/, const real& /*vx2*/, const real& /*vx3*/);
 
 private:
 	mu::Parser muVx1;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
index 7546aa30721cac1655fba94cb3d68e98d1398546..1e62e0a2c35367fb6189822bcdbf96b611d75bb9 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
@@ -110,6 +110,8 @@ void MetisPartitioningGridVisitor::distributePartitionData(SPtr<Grid3D> grid, Pa
 //////////////////////////////////////////////////////////////////////////
 void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D> grid, int nofSegments, PartLevel level)
 {
+    using namespace vf::lbm::dir;
+
     int edges                       = 0;
     const int edgeWeight            = 1;
     const int edgeWeightChildFactor = 8;
@@ -133,7 +135,7 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D>
             // the weights of the vertices are 2^level of grid (1, 2, 4, 8 .....) 1<<level
             metis.vwgt.push_back((idx_t)(1 << block->getLevel()));
 
-            for (int dir = D3Q27System::DIR_P00; dir <= numOfDirs; dir++) {
+            for (int dir = (int)DIR_P00; dir <= numOfDirs; dir++) {
                 SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, block);
                 if (neighBlock) {
                     if (this->getPartitionCondition(neighBlock, level)) {
@@ -169,6 +171,8 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D>
 //////////////////////////////////////////////////////////////////////////
 void MetisPartitioningGridVisitor::buildMetisGraphLevelBased(SPtr<Grid3D> grid, int nofSegments, PartLevel level)
 {
+    using namespace vf::lbm::dir;
+
     int minInitLevel = grid->getCoarsestInitializedLevel();
     int maxInitLevel = grid->getFinestInitializedLevel();
 
@@ -200,7 +204,7 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelBased(SPtr<Grid3D> grid,
             metis.xadj.push_back(edges);
             metis.vwgt.push_back(vertexWeight);
 
-            for (int dir = D3Q27System::DIR_P00; dir <= numOfDirs; dir++) {
+            for (int dir = (int)DIR_P00; dir <= numOfDirs; dir++) {
                 SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, block);
                 if (neighBlock) {
                     if (this->getPartitionCondition(neighBlock, level)) {
@@ -256,11 +260,13 @@ void MetisPartitioningGridVisitor::clear()
 int MetisPartitioningGridVisitor::getEdgeWeight(int dir)
 {
     using namespace D3Q27System;
-    if (dir <= DIR_00M) {
+    using namespace vf::lbm::dir;
+
+    if (dir <= (int)DIR_00M) {
         return 100;
-    } else if (dir >= DIR_PP0 && dir <= DIR_0MP) {
+    } else if (dir >= (int)DIR_PP0 && dir <= (int)DIR_0MP) {
         return 10;
-    } else if (dir >= DIR_PPP) {
+    } else if (dir >= (int)DIR_PPP) {
         return 1;
     }
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
index 003d5d31204fafc82f78a0fddb04897c2c60e77f..b4eee2dfbd952d27835dbaab24da84c041999a21 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
@@ -77,12 +77,12 @@ void MultiphaseBoundaryConditionsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Blo
       SPtr<BCArray3D> bcArray = bcProcessor->getBCArray();
 
       bool compressible = kernel->getCompressible();
-      double collFactorL = kernel->getCollisionFactorL();
-	  double collFactorG = kernel->getCollisionFactorG();
-	  double collFactorPh = 1.0/kernel->getPhaseFieldRelaxation();
-	  double densityRatio = kernel->getDensityRatio();
-	  LBMReal phiL = kernel->getPhiL();
-	  LBMReal phiH = kernel->getPhiH();
+      real collFactorL = kernel->getCollisionFactorL();
+	  real collFactorG = kernel->getCollisionFactorG();
+	  real collFactorPh = 1.0/kernel->getPhaseFieldRelaxation();
+	  real densityRatio = kernel->getDensityRatio();
+	  real phiL = kernel->getPhiL();
+	  real phiH = kernel->getPhiH();
       //int level = block->getLevel();
 
       int minX1 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
index a35fc289b7505c722151e2a5afe98815131a989d..fc57572c9183f7ba23701d37703fa0c900e1ccc4 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
@@ -50,7 +50,7 @@ MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor
 	this->setRho(0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor( LBMReal densityRatio, LBMReal vx1, LBMReal vx2, LBMReal vx3, LBMReal rho)
+MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor( real densityRatio, real vx1, real vx2, real vx3, real rho)
 	: Block3DVisitor(0, D3Q27System::MAXLEVEL), densityRatio(densityRatio) 
 {
 	this->setVx1(vx1);
@@ -118,31 +118,31 @@ void MultiphaseInitDistributionsBlockVisitor::setPhi( const std::string& muParse
 	this->checkFunction(muPhi); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx1( LBMReal vx1 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx1( real vx1 ) 
 { 
 	this->muVx1.SetExpr( UbSystem::toString(vx1,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx1); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx2( LBMReal vx2 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx2( real vx2 ) 
 { 
 	this->muVx2.SetExpr( UbSystem::toString(vx2,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx2); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx3( LBMReal vx3 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx3( real vx3 ) 
 { 
 	this->muVx3.SetExpr( UbSystem::toString(vx3,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx3); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setRho( LBMReal rho ) 
+void MultiphaseInitDistributionsBlockVisitor::setRho( real rho ) 
 { 
 	this->muRho.SetExpr( UbSystem::toString(rho,D3Q27RealLim::digits10) );  
 	this->checkFunction(muRho); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setPhi( LBMReal phi ) 
+void MultiphaseInitDistributionsBlockVisitor::setPhi( real phi ) 
 { 
 	this->muPhi.SetExpr( UbSystem::toString(phi,D3Q27RealLim::digits10) );  
 	this->checkFunction(muPhi); 
@@ -151,6 +151,7 @@ void MultiphaseInitDistributionsBlockVisitor::setPhi( LBMReal phi )
 void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block) 
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
@@ -162,7 +163,7 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 	this->muRho.DefineVar("x1",&x1); this->muRho.DefineVar("x2",&x2); this->muRho.DefineVar("x3",&x3);
 	this->muPhi.DefineVar("x1",&x1); this->muPhi.DefineVar("x2",&x2); this->muPhi.DefineVar("x3",&x3);
 
-	LBMReal vx1, vx2, vx3, rho, /*p1,*/ phi;
+	real vx1, vx2, vx3, rho, /*p1,*/ phi;
 
 	int gridRank = grid->getRank();
 	int blockRank = block->getRank();
@@ -178,10 +179,10 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 		SPtr<EsoTwist3D> distributionsH = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getHdistributions());
         SPtr<EsoTwist3D> distributionsH2 = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getH2distributions());
 
-		LBMReal phiL = kernel->getPhiL();
-		LBMReal phiH = kernel->getPhiH();
+		real phiL = kernel->getPhiL();
+		real phiH = kernel->getPhiH();
 
-		LBMReal f[D3Q27System::ENDF+1];
+		real f[D3Q27System::ENDF+1];
 
 		for(int ix3=0; ix3<(int)bcArray->getNX3(); ix3++)
             for (int ix2 = 0; ix2 < (int)bcArray->getNX2(); ix2++)
@@ -201,29 +202,29 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 					phi = muPhi.Eval();
 					
 					//rho = phi*1.0 + (1.0-phi)/densityRatio;
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0/densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0/densityRatio;
 					rho = rhoH + (rhoH - rhoL)*(phi - phiH)/(phiH - phiL);
 
 			
-					LBMReal feq[27];
-					LBMReal geq[27];
+					real feq[27];
+					real geq[27];
 
 					//calcFeqsFct(feq,rho,vx1,vx2,vx3);
-					LBMReal vx1Sq = vx1*vx1;
-					LBMReal vx2Sq = vx2*vx2;
-					LBMReal vx3Sq = vx3*vx3;
+					real vx1Sq = vx1*vx1;
+					real vx2Sq = vx2*vx2;
+					real vx3Sq = vx3*vx3;
 					for (int dir = STARTF; dir < (ENDF+1); dir++)
 					{
-						LBMReal velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
-						LBMReal velSq1 = velProd*velProd;
-						LBMReal gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
+						real velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
+						real velSq1 = velProd*velProd;
+						real gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 
 						feq[dir] = rho*WEIGTH[dir]*(1 + 3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 						//geq[dir] = p1*WEIGTH[dir] + gamma;
 						//geq[dir] = p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho;
 						//geq[dir] = (p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho)*UbMath::c1o3;
-						geq[dir] = (gamma*rho)*UbMath::c1o3;
+						geq[dir] = (gamma*rho)* vf::lbm::constant::c1o3;
 					}
 
 
@@ -335,7 +336,7 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-	double x1=1.0,x2=1.0,x3=1.0;
+	real x1=1.0,x2=1.0,x3=1.0;
 	fct.DefineVar("x1",&x1); 
 	fct.DefineVar("x2",&x2); 
 	fct.DefineVar("x3",&x3);
@@ -352,7 +353,7 @@ void MultiphaseInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setNu( LBMReal nu )
+void MultiphaseInitDistributionsBlockVisitor::setNu( real nu )
 {
 	this->nu = nu;
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
index 6ff60387daeef966da6143ef459fa7b7d247fbd5..6077a1294582ea423dd3c5fcb446dd841e65491a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
@@ -45,7 +45,7 @@
 class MultiphaseInitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	MultiphaseInitDistributionsBlockVisitor();
@@ -56,7 +56,7 @@ public:
 	//! \param vx1 - velocity in x
 	//! \param vx2 - velocity in y
 	//! \param vx3 - velocity in z
-	MultiphaseInitDistributionsBlockVisitor( LBMReal densityRatio, LBMReal vx1=0.0, LBMReal vx2=0.0, LBMReal vx3=0.0, LBMReal rho=0.0);
+	MultiphaseInitDistributionsBlockVisitor( real densityRatio, real vx1=0.0, real vx2=0.0, real vx3=0.0, real rho=0.0);
 	//////////////////////////////////////////////////////////////////////////
 	//automatic vars are: x1,x2, x3
 	//ussage example: setVx1("x1*0.01+x2*0.003")
@@ -74,12 +74,12 @@ public:
 	void setPhi( const std::string& muParserString);
 
 	//////////////////////////////////////////////////////////////////////////
-	void setVx1( LBMReal vx1 );
-	void setVx2( LBMReal vx2 );
-	void setVx3( LBMReal vx3 );
-	void setRho( LBMReal rho );
-	void setPhi( LBMReal rho );
-	void setNu( LBMReal nu );
+	void setVx1( real vx1 );
+	void setVx2( real vx2 );
+	void setVx3( real vx3 );
+	void setRho( real rho );
+	void setPhi( real rho );
+	void setNu( real nu );
 
 	void visit(SPtr<Grid3D> grid, SPtr<Block3D> block);
 
@@ -93,8 +93,8 @@ private:
 	mu::Parser muRho;
 	mu::Parser muPhi;
 
-	LBMReal nu;
-	LBMReal densityRatio;
+	real nu;
+	real densityRatio;
 };
 
 #endif //D3Q27INITDISTRIBUTIONSPATCHVISITOR_H
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
index 4990690e2d7d464cfbdc69f2966655568021e7d0..8885b09ea0e19d56bce205334263a7b5c1f16313 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
@@ -19,7 +19,7 @@
 //   }
 //}
 //////////////////////////////////////////////////////////////////////////
-MultiphaseSetKernelBlockVisitor::MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nuL, LBMReal nuG, double availMem, double needMem, MultiphaseSetKernelBlockVisitor::Action action /*= SetKernelBlockVisitor::New*/) :
+MultiphaseSetKernelBlockVisitor::MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nuL, real nuG, real availMem, real needMem, MultiphaseSetKernelBlockVisitor::Action action /*= SetKernelBlockVisitor::New*/) :
 	Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(kernel), nuL(nuL), nuG(nuG), action(action), dataSetFlag(true)
 {
 	if (needMem > availMem)
@@ -32,8 +32,8 @@ void MultiphaseSetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> blo
 {
 	if(kernel && (block->getRank() == grid->getRank()))
 	{
-		LBMReal collFactorL = LBMSystem::calcCollisionFactor(nuL, block->getLevel());
-		LBMReal collFactorG = LBMSystem::calcCollisionFactor(nuG, block->getLevel());
+		real collFactorL = LBMSystem::calcCollisionFactor(nuL, block->getLevel());
+		real collFactorG = LBMSystem::calcCollisionFactor(nuG, block->getLevel());
 		kernel->setCollisionFactorMultiphase(collFactorL, collFactorG);
 
 		kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
index 24d2b35c3a85b80e793b94d61feceb58b607ff19..566419d7f6f8a1e87a946e748e725ec1624d29ce 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
@@ -42,7 +42,7 @@ class MultiphaseSetKernelBlockVisitor : public Block3DVisitor
 public:
 	enum Action { NewKernel, ChangeKernel, ChangeKernelWithData};
 public:
-	MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nuL, LBMReal nuG, double availMem, double needMem, 
+	MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nuL, real nuG, real availMem, real needMem, 
 		MultiphaseSetKernelBlockVisitor::Action action = MultiphaseSetKernelBlockVisitor::NewKernel);
 
 	virtual ~MultiphaseSetKernelBlockVisitor() {}
@@ -53,8 +53,8 @@ public:
 
 private:
 	SPtr<LBMKernel> kernel;
-	LBMReal nuL;
-	LBMReal nuG;
+	real nuL;
+	real nuG;
 	Action action;
 	bool dataSetFlag;
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
index 28b035f71f7ab83f4ef33188e0d265588835d0eb..0c3fd6de0de70a3506cc2a9a00be80c267ad538a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
@@ -121,31 +121,31 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(const std:
 	this->checkFunction(muPressure);
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx1( LBMReal vx1 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx1( real vx1 ) 
 { 
 	this->muVx1.SetExpr( UbSystem::toString(vx1,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx1); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx2( LBMReal vx2 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx2( real vx2 ) 
 { 
 	this->muVx2.SetExpr( UbSystem::toString(vx2,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx2); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx3( LBMReal vx3 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx3( real vx3 ) 
 { 
 	this->muVx3.SetExpr( UbSystem::toString(vx3,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx3); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setRho( LBMReal rho ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setRho( real rho ) 
 { 
 	this->muRho.SetExpr( UbSystem::toString(rho,D3Q27RealLim::digits10) );  
 	this->checkFunction(muRho); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( LBMReal phi ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( real phi ) 
 { 
 	this->muPhi.SetExpr( UbSystem::toString(phi,D3Q27RealLim::digits10) );  
 	this->checkFunction(muPhi); 
@@ -154,6 +154,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( LBMReal phi )
 void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block) 
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
@@ -187,7 +188,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 		//LBMReal phiL = kernel->getPhiL();
 		//LBMReal phiH = kernel->getPhiH();
 
-		LBMReal f[D3Q27System::ENDF+1];
+		real f[D3Q27System::ENDF+1];
 
 		for(int ix3=0; ix3<(int)bcArray->getNX3(); ix3++)
             for (int ix2 = 0; ix2 < (int)bcArray->getNX2(); ix2++)
@@ -198,7 +199,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
                     x2              = coords[1];
                     x3              = coords[2];
 
-					LBMReal vx1 = 0, vx2 = 0, vx3 = 0, p1 = 0, phi = 0,pres=0;
+					real vx1 = 0, vx2 = 0, vx3 = 0, p1 = 0, phi = 0,pres=0;
 					//p1  = 0.0;
 					p1 = muRho.Eval();
 					vx1 = muVx1.Eval();
@@ -215,24 +216,24 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 					//LBMReal rho = rhoH + (rhoH - rhoL)*(phi - phiH)/(phiH - phiL);
 
 			
-					LBMReal feq[27];
-					LBMReal geq[27];
+					real feq[27];
+					real geq[27];
 
 					//calcFeqsFct(feq,rho,vx1,vx2,vx3);
-					LBMReal vx1Sq = vx1*vx1;
-					LBMReal vx2Sq = vx2*vx2;
-					LBMReal vx3Sq = vx3*vx3;
+					real vx1Sq = vx1*vx1;
+					real vx2Sq = vx2*vx2;
+					real vx3Sq = vx3*vx3;
 					for (int dir = STARTF; dir < (ENDF+1); dir++)
 					{
-						LBMReal velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
-						LBMReal velSq1 = velProd*velProd;
-						LBMReal gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
+						real velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
+						real velSq1 = velProd*velProd;
+						real gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 
 						//feq[dir] = rho*WEIGTH[dir]*(1 + 3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 						feq[dir] =  WEIGTH[dir] * (1 + 3 * velProd + 4.5 * velSq1 - 1.5 * (vx1Sq + vx2Sq + vx3Sq));
 						//geq[dir] = p1*WEIGTH1[dir] + gamma;
 						//geq[dir] = p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho;
-						geq[dir] = p1 * WEIGTH[dir] / ( UbMath::c1o3) + gamma ;
+						geq[dir] = p1 * WEIGTH[dir] / (vf::lbm::constant::c1o3) + gamma ;
 					}
 
 
@@ -346,7 +347,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseVelocityFormInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-	double x1=1.0,x2=1.0,x3=1.0;
+	real x1=1.0,x2=1.0,x3=1.0;
 	fct.DefineVar("x1",&x1); 
 	fct.DefineVar("x2",&x2); 
 	fct.DefineVar("x3",&x3);
@@ -363,12 +364,12 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::checkFunction(mu::Pars
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setNu( LBMReal nu )
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setNu( real nu )
 {
 	this->nu = nu;
 }
 
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(LBMReal pres)
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(real pres)
 {
 	this->muPressure.SetExpr(UbSystem::toString(pres, D3Q27RealLim::digits10));
 	this->checkFunction(muPressure);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
index 553cfe75b6b881c96a0542f184bf50c88146babc..92ab5eff9af4559bf22893c9a8506362604606c3 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
@@ -45,7 +45,7 @@
 class MultiphaseVelocityFormInitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	MultiphaseVelocityFormInitDistributionsBlockVisitor();
@@ -75,13 +75,13 @@ public:
 	void setPressure(const std::string& muParserString);
 
 	//////////////////////////////////////////////////////////////////////////
-	void setVx1( LBMReal vx1 );
-	void setVx2( LBMReal vx2 );
-	void setVx3( LBMReal vx3 );
-	void setRho( LBMReal rho );
-	void setPhi( LBMReal rho );
-	void setNu( LBMReal nu );
-	void setPressure(LBMReal pres);
+	void setVx1( real vx1 );
+	void setVx2( real vx2 );
+	void setVx3( real vx3 );
+	void setRho( real rho );
+	void setPhi( real rho );
+	void setNu( real nu );
+	void setPressure(real pres);
 
 	void visit(SPtr<Grid3D> grid, SPtr<Block3D> block);
 
@@ -96,7 +96,7 @@ private:
 	mu::Parser muPhi;
 	mu::Parser muPressure;
 
-	LBMReal nu;
+	real nu;
 };
 
 #endif //D3Q27INITDISTRIBUTIONSPATCHVISITOR_H
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
index b0b0cf7743fd195796ef2fb3276a9a3921adf465..a73965641237c804cd094f399e582336e6be8e04 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
@@ -10,7 +10,7 @@
 
 RefineAroundGbObjectHelper::RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int refineLevel,
                                                        SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                                                       double startDistance, double stopDistance,
+                                                       real startDistance, real stopDistance,
                                                        std::shared_ptr<vf::mpi::Communicator> comm)
     : grid(grid), refineLevel(refineLevel), objectIter(objectIter), startDistance(startDistance),
       stopDistance(stopDistance), comm(comm)
@@ -21,6 +21,8 @@ RefineAroundGbObjectHelper::~RefineAroundGbObjectHelper(void) = default;
 //////////////////////////////////////////////////////////////////////////
 void RefineAroundGbObjectHelper::refine()
 {
+    using namespace vf::lbm::dir;
+
     UBLOG(logDEBUG5, "RefineCrossAndInsideGbObjectHelper: refine - start");
 
     int rank = grid->getRank();
@@ -38,7 +40,7 @@ void RefineAroundGbObjectHelper::refine()
     grid->accept(overlapVisitor);
 
     std::vector<int> dirs;
-    for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_0MP; i++) {
+    for (int i = (int)DIR_P00; i <= (int)DIR_0MP; i++) {
         dirs.push_back(i);
     }
     SetInterpolationDirsBlockVisitor interDirsVisitor(dirs);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
index 0421a963e6d57da5096370eed9721220c98939b4..76874ce767294efa318bb7e8b9f8b4d2e2a348eb 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
@@ -2,6 +2,7 @@
 #define RefineAroundGbObjectHelper_H
 
 #include <PointerDefinitions.h>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 namespace vf::mpi {class Communicator;}
@@ -20,7 +21,7 @@ public:
     //! \param startDistance start distance from geometry for refinement
     //! \param stopDistance stop distance from geometry for refinement
     RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                               double startDistance, double stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
+                               real startDistance, real stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~RefineAroundGbObjectHelper();
     //! start refinement
     void refine();
@@ -29,7 +30,7 @@ private:
     SPtr<Grid3D> grid;
     SPtr<D3Q27TriFaceMeshInteractor> objectIter;
     int refineLevel;
-    double startDistance, stopDistance;
+    real startDistance, stopDistance;
     std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
index 3bb1546896ee40ecdb9acf69586251ad1f03bb62..52c7c3ac1204a96fe7db3089ef2eb3ecc93ac143 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
@@ -55,7 +55,7 @@ void RefineCrossAndInsideGbObjectHelper::refine()
 
     std::vector<int> dirs;
 
-    for (int i = D3Q27System::STARTDIR; i <= D3Q27System::ENDDIR; i++) {
+    for (int i = D3Q27System::FSTARTDIR; i <= D3Q27System::FENDDIR; i++) {
         dirs.push_back(i);
     }
     SetInterpolationDirsBlockVisitor interDirsVisitor(dirs);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
index 7fa6a6283c1a97d07f55405ad3b00af55f1d7690..b1a9a3b399dd0e3a8538187165af079e6bd3fdc5 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
@@ -3,6 +3,7 @@
 #include "Block3D.h"
 #include "Grid3D.h"
 #include <geometry3d/GbObject3D.h>
+#include "lbm/constants/D3Q27.h"
 
 RefineInterGbObjectsBlockVisitor::RefineInterGbObjectsBlockVisitor() : Block3DVisitor(-1, -1) {}
 //////////////////////////////////////////////////////////////////////////
@@ -29,12 +30,12 @@ void RefineInterGbObjectsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> bl
     UbTupleDouble3 coords = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 delta  = grid->getBlockLengths(block);
 
-    double cellMinX1 = val<1>(coords);
-    double cellMinX2 = val<2>(coords);
-    double cellMinX3 = val<3>(coords);
-    double cellMaxX1 = val<1>(coords) + val<1>(delta);
-    double cellMaxX2 = val<2>(coords) + val<2>(delta);
-    double cellMaxX3 = val<3>(coords) + val<3>(delta);
+    real cellMinX1 = val<1>(coords);
+    real cellMinX2 = val<2>(coords);
+    real cellMinX3 = val<3>(coords);
+    real cellMaxX1 = val<1>(coords) + val<1>(delta);
+    real cellMaxX2 = val<2>(coords) + val<2>(delta);
+    real cellMaxX3 = val<3>(coords) + val<3>(delta);
 
     bool insideInclude = false;
     for (size_t i = 0; i < includeGbObjects3D.size(); i++) {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
index 7930219451088aaef222d06c1a5a72d159817798..ae214c77ca425dde4ecde31f7dc88d19a1616555 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
@@ -104,6 +104,8 @@ void SetConnectorsBlockVisitor<T1, T2>::visit(SPtr<Grid3D> grid, SPtr<Block3D> b
 template <class T1, class T2>
 void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     UBLOG(logDEBUG5, "SetConnectorsBlockVisitor::setSameLevelConnectors() - start");
     int blockRank = block->getRank();
     if (gridRank == blockRank && block->isActive()) {
@@ -114,7 +116,7 @@ void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid
         int ix3   = block->getX3();
         int level = block->getLevel();
 
-        for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++) {
+        for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) { 
             SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, ix1, ix2, ix3, level);
 
             if (neighBlock) {
@@ -126,7 +128,7 @@ void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid
                 } else if (blockRank != neighBlockRank && neighBlock->isActive()) {
                     setRemoteConnectors(block, neighBlock, dir);
 
-                    if (dir >= D3Q27System::DIR_P00 && dir <= D3Q27System::DIR_00M) {
+                    if (dir >= (int)DIR_P00 && dir <= (int)DIR_00M) {
                         int weight = block->getWeight(neighBlockRank);
                         weight++;
                         block->setWeight(neighBlockRank, weight);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
index abf828a06e0ec83b492ff9107be4a9a3c4445674..a325fc5ac355e31a1cf1188b884d5e719e85c7be 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
@@ -4,7 +4,7 @@
 #include "D3Q27System.h"
 #include "LBMSystem.h"
 
-SetForcingBlockVisitor::SetForcingBlockVisitor(LBMReal forcingX1, LBMReal forcingX2, LBMReal forcingX3)
+SetForcingBlockVisitor::SetForcingBlockVisitor(real forcingX1, real forcingX2, real forcingX3)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), forcingX1(forcingX1), forcingX2(forcingX2), forcingX3(forcingX3)
 {
     ftype = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
index a6d13c2a702f7ceca6122a78dda1b34f63caf376..e7237d7fc7833f69aa486858527a167f53864afe 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
@@ -13,7 +13,7 @@ class Grid3D;
 class SetForcingBlockVisitor : public Block3DVisitor
 {
 public:
-    SetForcingBlockVisitor(LBMReal forcingX1, LBMReal forcingX2, LBMReal forcingX3);
+    SetForcingBlockVisitor(real forcingX1, real forcingX2, real forcingX3);
 
     SetForcingBlockVisitor(const mu::Parser &muForcingX1, const mu::Parser &muForcingX2, const mu::Parser &muForcingX3);
 
@@ -25,9 +25,9 @@ public:
 
 private:
     int ftype;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
     mu::Parser muForcingX1;
     mu::Parser muForcingX2;
     mu::Parser muForcingX3;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
index 54f46c811d4c2d065bbda7232bd4e32f24559c22..7ff7d20b9fcd85b6939f3184fcde86c6fdaae77d 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
@@ -42,7 +42,7 @@
 #include <mpi/Communicator.h>
 #include "InterpolationProcessor.h"
 
-SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor) :
+SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<InterpolationProcessor> iProcessor) :
 Block3DVisitor(0, D3Q27System::MAXLEVEL), 
 	comm(comm),
 	nue(nue),
@@ -72,6 +72,8 @@ void SetInterpolationConnectorsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block
 //////////////////////////////////////////////////////////////////////////
 void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+	using namespace vf::lbm::dir;
+
    UBLOG(logDEBUG5, "SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors() - start");
 
 	//search for all blocks with different ranks
@@ -82,251 +84,251 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Gri
 		int fbx3 = block->getX3() << 1;
 		int level = block->getLevel() + 1;
 
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+		if( block->hasInterpolationFlagCF(DIR_P00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P00);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P00);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+		if( block->hasInterpolationFlagCF(DIR_M00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M00);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M00);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0P0))
+		if( block->hasInterpolationFlagCF(DIR_0P0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0P0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0P0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0M0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0M0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_00P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_00P);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_00M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_00M);
 		}
 
 		//////NE-NW-SE-SW
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+		if( block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+1,fbx3+0,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PP0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PP0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MM0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MM0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2,fbx3+0,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PM0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PM0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+		if( block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MP0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MP0);
 		}
 
 		/////////TE-BW-BE-TW 1-0
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_P0P)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+0,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P0P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P0P);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+0,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M0M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M0M);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_P0M)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+0,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P0M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P0M);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_M0P)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+0,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M0P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M0P);
 		}
 
 		//////TN-BS-BN-TS
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_0PP)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0PP);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0PP);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_0M0) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0MM);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0MM);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0PM);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0PM);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_0M0) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0MP);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0MP);
 		}
 
 
 
 
       //////corners
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PPP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+      if (block->hasInterpolationFlagCF(DIR_PPP)&&!block->hasInterpolationFlagCF(DIR_P0P)&&!block->hasInterpolationFlagCF(DIR_0PP)&&!block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_00P)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PPP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PPP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MMP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_MMP)&&!block->hasInterpolationFlagCF(DIR_M0P)&&!block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MMP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MMP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PMP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_PMP)&&!block->hasInterpolationFlagCF(DIR_P0P)&&!block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PMP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PMP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MPP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+      if (block->hasInterpolationFlagCF(DIR_MPP)&&!block->hasInterpolationFlagCF(DIR_M0P)&&!block->hasInterpolationFlagCF(DIR_0PP)&& !block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MPP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MPP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PPM)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+      if (block->hasInterpolationFlagCF(DIR_PPM)&&!block->hasInterpolationFlagCF(DIR_P0M)&&!block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_00M)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PPM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PPM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MMM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_MMM)&& !block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MMM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MMM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PMM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_PMM)&& !block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_P0M)&& !block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2, fbx3, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PMM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PMM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MPM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+      if (block->hasInterpolationFlagCF(DIR_MPM)&& !block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MPM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MPM);
       }
 
 	}
@@ -343,12 +345,12 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 	if(fBlockNE) fBlockNERank = fBlockNE->getRank();
 	int cBlockRank   = cBlock->getRank();
 
-	LBMReal omegaF {0.0};
+	real omegaF {0.0};
 	if(fBlockSW) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockSW->getLevel());
 	if(fBlockNW) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockNW->getLevel());
 	if(fBlockSE) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockSE->getLevel());
 	if(fBlockNE) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockNE->getLevel());
-	LBMReal omegaC = LBMSystem::calcCollisionFactor(nue, cBlock->getLevel());
+	real omegaC = LBMSystem::calcCollisionFactor(nue, cBlock->getLevel());
 	iProcessor->setOmegas(omegaC, omegaF);
 
 	InterpolationProcessorPtr cIProcessor(iProcessor->clone());
@@ -373,7 +375,7 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 
 	if(cBlockRank == gridRank)
 	{
-      SPtr<Block3DConnector> connector(new CoarseToFineVectorConnector< TbTransmitter< CbVector< LBMReal > > >(cBlock,
+      SPtr<Block3DConnector> connector(new CoarseToFineVectorConnector< TbTransmitter< CbVector< real > > >(cBlock,
 			senderCFevenEvenSW, receiverCFevenEvenSW, senderCFevenOddNW,  receiverCFevenOddNW, 
 			senderCFoddEvenSE,  receiverCFoddEvenSE,  senderCFoddOddNE,   receiverCFoddOddNE, 
 			dir, cIProcessor) );
@@ -381,25 +383,25 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 	}
 	if(fBlockSW && fBlockSWRank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockSW, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockSW, 
 			senderFCevenEvenSW, receiverFCevenEvenSW, dir, fIProcessorSW, EvenEvenSW) );
 		fBlockSW->setConnector(connector);
 	}
 	if(fBlockNW && fBlockNWRank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockNW, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockNW, 
 			senderFCevenOddNW, receiverFCevenOddNW, dir, fIProcessorNW, EvenOddNW) );
 		fBlockNW->setConnector(connector);
 	}
 	if(fBlockSE && fBlockSERank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockSE, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockSE, 
 			senderFCoddEvenSE, receiverFCoddEvenSE, dir, fIProcessorSE, OddEvenSE) );
 		fBlockSE->setConnector(connector);
 	}
 	if(fBlockNE && fBlockNERank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockNE, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockNE, 
 			senderFCoddOddNE, receiverFCoddOddNE, dir, fIProcessorNE, OddOddNE) );
 		fBlockNE->setConnector(connector);
 	}
@@ -419,8 +421,8 @@ void SetInterpolationConnectorsBlockVisitor::createTransmitters(SPtr<Block3D> cB
 	int cBlockRank = cBlock->getRank();
 	if(fBlockRank == cBlockRank && fBlockRank == gridRank)
 	{
-		senderCF = receiverFC = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< LBMReal > >());
-		senderFC = receiverCF = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< LBMReal > >());
+		senderCF = receiverFC = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< real > >());
+		senderFC = receiverCF = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< real > >());
 	}
 	else if(cBlockRank == gridRank)
 	{
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
index 7ae54b0b62cadbc58eb5b0cc804f00a977d47615..c30d87ecaa042a30d931e29ba185fdd5230e2a68 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
@@ -50,7 +50,7 @@ class InterpolationProcessor;
 class SetInterpolationConnectorsBlockVisitor : public Block3DVisitor
 {
 public:
-    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor);
+    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<InterpolationProcessor> iProcessor);
     ~SetInterpolationConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
@@ -65,7 +65,7 @@ protected:
                             CreateTransmittersHelper::TransmitterPtr &receiverFC);
     std::shared_ptr<vf::mpi::Communicator> comm;
     int gridRank;
-    LBMReal nue;
+    real nue;
     SPtr<InterpolationProcessor> iProcessor;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
index 689d84d0754f74c2f680fd2b7aa22ec0c54008c1..dbb85c0b848d2c5dc89e99f72d4091476eb31790 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
@@ -11,6 +11,8 @@ SetInterpolationDirsBlockVisitor::SetInterpolationDirsBlockVisitor(std::vector<i
 //////////////////////////////////////////////////////////////////////////
 void SetInterpolationDirsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     int ix1, ix2, ix3, level;
     ix1   = block->getX1();
     ix2   = block->getX2();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
index 354a577e701f9d017181e6006833ad40749eef60..54271370c11700886f969eeef75a2389ef062828 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
@@ -44,7 +44,7 @@
 #include <utility>
 
 //////////////////////////////////////////////////////////////////////////
-SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, double availMem, double needMem,
+SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, real availMem, real needMem,
                                              SetKernelBlockVisitor::Action action)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true)
 {
@@ -53,7 +53,7 @@ SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue
     }
 }
 
-SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int numberOfProcesses,
+SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, int numberOfProcesses,
                                              SetKernelBlockVisitor::Action action)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true),
       numberOfProcesses(numberOfProcesses)
@@ -66,7 +66,7 @@ void SetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
     throwExceptionIfNotEnoughMemory(grid);
 
     if (kernel && (block->getRank() == grid->getRank())) {
-        LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+        real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
         kernel->setCollisionFactor(collFactor);
         kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
         kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -122,7 +122,7 @@ void SetKernelBlockVisitor::throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &
         throw UbException(UB_EXARGS, "SetKernelBlockVisitor: Not enough memory!!!");
 }
 
-double SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const
+real SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const
 {
     unsigned long long numberOfNodesPerBlockWithGhostLayer;
     auto numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
@@ -133,7 +133,7 @@ double SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid
                                           (val<2>(blockNx) + ghostLayer) * (val<3>(blockNx) + ghostLayer);
 
     auto needMemAll =
-        double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
+        real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
 
-    return needMemAll / double(numberOfProcesses);
+    return needMemAll / real(numberOfProcesses);
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
index 1e0621f22379e52701aafa4ab06f858cb1247d7e..29685f49908e5266dc5307f17da42c9e8c874491 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
@@ -49,10 +49,10 @@ class SetKernelBlockVisitor : public Block3DVisitor
 public:
     enum Action { NewKernel, ChangeKernel, ChangeKernelWithData };
 
-    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, double availMem, double needMem,
+    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, real availMem, real needMem,
                           SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel);
 
-    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int numberOfProcesses,
+    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, int numberOfProcesses,
                           SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel);
 
     ~SetKernelBlockVisitor() override = default;
@@ -63,13 +63,13 @@ public:
 
 private:
     SPtr<LBMKernel> kernel;
-    LBMReal nue;
+    real nue;
     Action action;
     bool dataSetFlag;
 
     int numberOfProcesses{ 1 };
 
-    double getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const;
+    real getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const;
 
     void throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &grid);
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
index 3b9eb9493fe0fa66f05fdd3ea42505604836d218..bb6cc5dc6fe06b1a63647d83897bc1fe83066a1a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
@@ -15,6 +15,8 @@ SetUndefinedNodesBlockVisitor::SetUndefinedNodesBlockVisitor(bool twoTypeOfConne
 //////////////////////////////////////////////////////////////////////////
 void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!block->hasInterpolationFlag())
         return;
 
@@ -40,7 +42,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     // int offset = 2;
     int offset = 3;
 
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P00)) {
+    if (block->hasInterpolationFlag(DIR_P00)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -51,7 +53,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M00)) {
+    if (block->hasInterpolationFlag(DIR_M00)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -62,7 +64,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0P0)) {
+    if (block->hasInterpolationFlag(DIR_0P0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -73,7 +75,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0M0)) {
+    if (block->hasInterpolationFlag(DIR_0M0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -84,7 +86,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_00P)) {
+    if (block->hasInterpolationFlag(DIR_00P)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -95,7 +97,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_00M)) {
+    if (block->hasInterpolationFlag(DIR_00M)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -106,7 +108,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PP0)) {
+    if (block->hasInterpolationFlag(DIR_PP0)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -119,7 +121,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MM0)) {
+    if (block->hasInterpolationFlag(DIR_MM0)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -132,7 +134,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PM0)) {
+    if (block->hasInterpolationFlag(DIR_PM0)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -145,7 +147,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MP0)) {
+    if (block->hasInterpolationFlag(DIR_MP0)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -158,7 +160,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P0P)) {
+    if (block->hasInterpolationFlag(DIR_P0P)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -171,7 +173,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M0M)) {
+    if (block->hasInterpolationFlag(DIR_M0M)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -184,7 +186,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P0M)) {
+    if (block->hasInterpolationFlag(DIR_P0M)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -197,7 +199,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M0P)) {
+    if (block->hasInterpolationFlag(DIR_M0P)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -210,7 +212,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0PP)) {
+    if (block->hasInterpolationFlag(DIR_0PP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -223,7 +225,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0MM)) {
+    if (block->hasInterpolationFlag(DIR_0MM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -236,7 +238,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0PM)) {
+    if (block->hasInterpolationFlag(DIR_0PM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -249,7 +251,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0MP)) {
+    if (block->hasInterpolationFlag(DIR_0MP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -262,7 +264,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PPP)) {
+    if (block->hasInterpolationFlag(DIR_PPP)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -277,7 +279,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MPP)) {
+    if (block->hasInterpolationFlag(DIR_MPP)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -292,7 +294,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PMP)) {
+    if (block->hasInterpolationFlag(DIR_PMP)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -307,7 +309,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MMP)) {
+    if (block->hasInterpolationFlag(DIR_MMP)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -322,7 +324,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PPM)) {
+    if (block->hasInterpolationFlag(DIR_PPM)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -337,7 +339,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MPM)) {
+    if (block->hasInterpolationFlag(DIR_MPM)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -352,7 +354,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PMM)) {
+    if (block->hasInterpolationFlag(DIR_PMM)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -367,7 +369,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MMM)) {
+    if (block->hasInterpolationFlag(DIR_MMM)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -395,7 +397,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     maxX2 = static_cast<int>(bcMatrix->getNX2()) - 1 - ll;
     maxX3 = static_cast<int>(bcMatrix->getNX3()) - 1 - ll;
 
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P00)) {
+    if (block->hasInterpolationFlagFC(DIR_P00)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -404,7 +406,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M00)) {
+    if (block->hasInterpolationFlagFC(DIR_M00)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -413,7 +415,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0P0)) {
+    if (block->hasInterpolationFlagFC(DIR_0P0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -422,7 +424,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0M0)) {
+    if (block->hasInterpolationFlagFC(DIR_0M0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -431,7 +433,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_00P)) {
+    if (block->hasInterpolationFlagFC(DIR_00P)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -440,7 +442,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_00M)) {
+    if (block->hasInterpolationFlagFC(DIR_00M)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -449,7 +451,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PP0)) {
+    if (block->hasInterpolationFlagFC(DIR_PP0)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -458,7 +460,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MM0)) {
+    if (block->hasInterpolationFlagFC(DIR_MM0)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -467,7 +469,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PM0)) {
+    if (block->hasInterpolationFlagFC(DIR_PM0)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -476,7 +478,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MP0)) {
+    if (block->hasInterpolationFlagFC(DIR_MP0)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -485,7 +487,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P0P)) {
+    if (block->hasInterpolationFlagFC(DIR_P0P)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -494,7 +496,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M0M)) {
+    if (block->hasInterpolationFlagFC(DIR_M0M)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -503,7 +505,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P0M)) {
+    if (block->hasInterpolationFlagFC(DIR_P0M)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -512,7 +514,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M0P)) {
+    if (block->hasInterpolationFlagFC(DIR_M0P)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -521,7 +523,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0PP)) {
+    if (block->hasInterpolationFlagFC(DIR_0PP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -530,7 +532,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0MM)) {
+    if (block->hasInterpolationFlagFC(DIR_0MM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -539,7 +541,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0PM)) {
+    if (block->hasInterpolationFlagFC(DIR_0PM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -548,7 +550,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0MP)) {
+    if (block->hasInterpolationFlagFC(DIR_0MP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -557,7 +559,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PPP)) {
+    if (block->hasInterpolationFlagFC(DIR_PPP)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -566,7 +568,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MPP)) {
+    if (block->hasInterpolationFlagFC(DIR_MPP)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -575,7 +577,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PMP)) {
+    if (block->hasInterpolationFlagFC(DIR_PMP)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -584,7 +586,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MMP)) {
+    if (block->hasInterpolationFlagFC(DIR_MMP)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -593,7 +595,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PPM)) {
+    if (block->hasInterpolationFlagFC(DIR_PPM)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -602,7 +604,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MPM)) {
+    if (block->hasInterpolationFlagFC(DIR_MPM)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -611,7 +613,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PMM)) {
+    if (block->hasInterpolationFlagFC(DIR_PMM)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -620,7 +622,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MMM)) {
+    if (block->hasInterpolationFlagFC(DIR_MMM)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -633,10 +635,10 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     // invert scaleCF blocks
     if (block->hasInterpolationFlagCF()) {
         if (block->hasInterpolationFlagFC() && twoTypeOfConnectorsCheck) {
-            for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_MMM; i++) {
+            for (int i = (int)DIR_P00; i <= (int)DIR_MMM; i++) {
                 UBLOG(logINFO, "FC in dir=" << i << " " << block->hasInterpolationFlagFC(i));
             }
-            for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_MMM; i++) {
+            for (int i = (int)DIR_P00; i <= (int)DIR_MMM; i++) {
                 UBLOG(logINFO, "CF in dir=" << i << " " << block->hasInterpolationFlagCF(i));
             }
             throw UbException(UB_EXARGS, "block " + block->toString() + " has CF and FC");
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
index 1ef34e0e8bbf54625efbc946b141cf16f24c213d..6183024279ce1753f2fd78bf20b72313b84662f1 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
@@ -13,7 +13,7 @@
 
 using namespace std;
 
-SpongeLayerBlockVisitor::SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, double nue,
+SpongeLayerBlockVisitor::SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, real nue,
                                                  int dir)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), boundingBox(boundingBox), kernel(kernel), nue(nue), dir(dir)
 {
@@ -23,6 +23,8 @@ SpongeLayerBlockVisitor::~SpongeLayerBlockVisitor() = default;
 //////////////////////////////////////////////////////////////////////////
 void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!boundingBox) {
         UB_THROW(UbException(UB_EXARGS, "The bounding box isn't set!"));
     }
@@ -33,15 +35,15 @@ void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
         UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
 
-        double minX1 = val<1>(org);
-        double minX2 = val<2>(org);
-        double minX3 = val<3>(org);
-        double maxX1 = val<1>(org) + val<1>(blockLengths);
-        double maxX2 = val<2>(org) + val<2>(blockLengths);
-        double maxX3 = val<3>(org) + val<3>(blockLengths);
+        real minX1 = val<1>(org);
+        real minX2 = val<2>(org);
+        real minX3 = val<3>(org);
+        real maxX1 = val<1>(org) + val<1>(blockLengths);
+        real maxX2 = val<2>(org) + val<2>(blockLengths);
+        real maxX3 = val<3>(org) + val<3>(blockLengths);
 
         if (boundingBox->isCellInsideGbObject3D(minX1, minX2, minX3, maxX1, maxX2, maxX3)) {
-            LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+            real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
             kernel->setCollisionFactor(collFactor);
             kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
             kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -63,35 +65,35 @@ void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
             }
             newKernel->setBCProcessor(bcProc);
 
-            double oldCollFactor = newKernel->getCollisionFactor();
+            real oldCollFactor = newKernel->getCollisionFactor();
 
             UbTupleInt3 ixMin = grid->getBlockIndexes(boundingBox->getX1Minimum(), boundingBox->getX2Minimum(),
                                                       boundingBox->getX3Minimum());
             UbTupleInt3 ixMax = grid->getBlockIndexes(boundingBox->getX1Maximum(), boundingBox->getX2Maximum(),
                                                       boundingBox->getX3Maximum());
 
-            double newCollFactor;
+            real newCollFactor;
 
-            if (dir == D3Q27System::DIR_P00) {
+            if (dir == DIR_P00) {
                 int ibX1      = block->getX1();
                 int ibMax     = val<1>(ixMax) - val<1>(ixMin) + 1;
-                double index  = (double)(ibX1 - val<1>(ixMin) + 1);
-                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_M00) {
+                real index  = (real)(ibX1 - val<1>(ixMin) + 1);
+                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_M00) {
                 int ibX1      = block->getX1();
                 int ibMax     = val<1>(ixMax) - val<1>(ixMin) + 1;
-                double index  = (double)(ibX1 - val<1>(ixMin) + 1);
-                newCollFactor = (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_00P) {
+                real index  = (real)(ibX1 - val<1>(ixMin) + 1);
+                newCollFactor = (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_00P) {
                 int ibX3      = block->getX3();
                 int ibMax     = val<3>(ixMax) - val<3>(ixMin) + 1;
-                double index  = (double)(ibX3 - val<3>(ixMin) + 1);
-                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_00M) {
+                real index  = (real)(ibX3 - val<3>(ixMin) + 1);
+                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_00M) {
                 int ibX3      = block->getX3();
                 int ibMax     = val<3>(ixMax) - val<3>(ixMin) + 1;
-                double index  = (double)(ibX3 - val<3>(ixMin) + 1);
-                newCollFactor = (oldCollFactor - 1.0) / (double)(ibMax)*index;
+                real index  = (real)(ibX3 - val<3>(ixMin) + 1);
+                newCollFactor = (oldCollFactor - 1.0) / (real)(ibMax)*index;
             } else
                 UB_THROW(UbException(UB_EXARGS, "Problem: no orthogonal sponge layer!"));
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
index 0896db06577fa57f3ae3a137430c69eac214e24f..184a89eba969f3a6506c83758b79d11cfb8d3d60 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
@@ -15,7 +15,7 @@ class LBMKernel;
 class SpongeLayerBlockVisitor : public Block3DVisitor
 {
 public:
-    SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, double nue, int dir);
+    SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, real nue, int dir);
     ~SpongeLayerBlockVisitor() override;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
@@ -23,7 +23,7 @@ public:
 private:
     SPtr<GbCuboid3D> boundingBox;
     SPtr<LBMKernel> kernel;
-    double nue;
+    real nue;
     int dir;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
index 311a8bf19786198e85b00eb500f6e7c90d2d5106..3dbe4d9c7c01a11f33d0f1e04a563ba1016a748b 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
@@ -5,12 +5,12 @@
 #include "ILBMKernel.h"
 #include "LBMSystem.h"
 
-ViscosityBlockVisitor::ViscosityBlockVisitor(LBMReal nu) : Block3DVisitor(0, D3Q27System::MAXLEVEL), nu(nu) {}
+ViscosityBlockVisitor::ViscosityBlockVisitor(real nu) : Block3DVisitor(0, D3Q27System::MAXLEVEL), nu(nu) {}
 //////////////////////////////////////////////////////////////////////////
 void ViscosityBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
     if (block->getRank() == grid->getRank()) {
-        LBMReal collFactor = LBMSystem::calcCollisionFactor(nu, block->getLevel());
+        real collFactor = LBMSystem::calcCollisionFactor(nu, block->getLevel());
         block->getKernel()->setCollisionFactor(collFactor);
     }
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
index 003bd41e1a02a547386cbde878564cf0b2bf8209..cb665f41c47f62d61584943bdf6f3ce64f84cc4f 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
@@ -12,14 +12,14 @@ class Block3D;
 class ViscosityBlockVisitor : public Block3DVisitor
 {
 public:
-    ViscosityBlockVisitor(LBMReal nu);
+    ViscosityBlockVisitor(real nu);
 
     ~ViscosityBlockVisitor() override = default;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    LBMReal nu;
+    real nu;
 };
 
 #endif
diff --git a/src/cpu/simulationconfig/src/Simulation.cpp b/src/cpu/simulationconfig/src/Simulation.cpp
index 1258df75b8440b468c942688c9dc3366e3e2a833..1fc777192d7b707ee28c1e1d2e8ae20d61df455b 100644
--- a/src/cpu/simulationconfig/src/Simulation.cpp
+++ b/src/cpu/simulationconfig/src/Simulation.cpp
@@ -120,7 +120,7 @@ void Simulation::run()
 
     auto metisVisitor = std::make_shared<MetisPartitioningGridVisitor>(communicator,
                                                                        MetisPartitioningGridVisitor::LevelBased,
-                                                                       D3Q27System::DIR_00M, MetisPartitioner::RECURSIVE);
+                                                                       vf::lbm::dir::DIR_00M, MetisPartitioner::RECURSIVE);
 
     InteractorsHelper intHelper(grid, metisVisitor);
     for (auto const &interactor : interactors)
diff --git a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
index fe4078af95904fa5e1580b54f3aa2edbb006bd3d..9c3bac9c3e2795fa99f339461c6a7f2d16448696 100644
--- a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
@@ -47,13 +47,13 @@ struct GKSGPU_EXPORT BoundaryCondition : virtual public BoundaryConditionStruct,
     virtual bool isWall() = 0;
 
     virtual bool isFluxBC();
-    
+
     virtual bool isInsulated();
 
     virtual bool secondCellsNeeded();
 
     virtual void runBoundaryConditionKernel( const SPtr<DataBase> dataBase,
-                                             const Parameters parameters, 
+                                             const Parameters parameters,
                                              const uint level ) = 0;
 
     BoundaryConditionStruct toStruct()
diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt
index 5dbc533cc5f45c006c29a12242350f0433518bbf..6db6cbac1ff60c76986c3c22cc8017300d4f71ea 100644
--- a/src/gpu/GksGpu/CMakeLists.txt
+++ b/src/gpu/GksGpu/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(GksGpu LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics lbmCuda GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
+vf_add_library(PRIVATE_LINK basics lbm GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
 
 target_include_directories(GksGpu PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
 
diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt
index b9a2d12df4d0bee9396a706c6636b5f4056b2d3a..8ac5e69513eca94710797db1f971b2461336b769 100644
--- a/src/gpu/GksMeshAdapter/CMakeLists.txt
+++ b/src/gpu/GksMeshAdapter/CMakeLists.txt
@@ -1,3 +1,3 @@
 project(GksMeshAdapter LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics GridGenerator lbmCuda)
+vf_add_library(PRIVATE_LINK basics GridGenerator lbm)
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5f3c4ad492b16c09b26acd00a624a54ad65dffda
--- /dev/null
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -0,0 +1,444 @@
+#include "TransientBCSetter.h"
+#include "GridGenerator/grid/Grid.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include <logger/Logger.h>
+
+
+#include <math.h>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <algorithm>
+
+SPtr<FileCollection> createFileCollection(std::string prefix, FileType type)
+{
+    switch(type)
+    {
+        case FileType::VTK:
+            return std::make_shared<VTKFileCollection>(prefix);
+            break;
+        default:
+            return nullptr;
+    }
+}
+
+SPtr<TransientBCInputFileReader> createReaderForCollection(SPtr<FileCollection> fileCollection, uint readLevel)
+{
+    switch(fileCollection->getFileType())
+    {
+        case FileType::VTK:
+            return std::make_shared<VTKReader>(std::static_pointer_cast<VTKFileCollection>(fileCollection), readLevel);
+            break;
+        default:
+            return nullptr;
+    }
+}
+
+template<typename T>
+std::vector<T> readStringToVector(std::string s)
+{
+    std::vector<T> out;
+    std::stringstream input(s);
+    float num;
+    while(input >> num)
+    {
+        out.push_back(num);
+    }
+    return out;
+}
+
+std::string readElement(std::string line)
+{
+    size_t elemStart = line.find("<")+1;
+    // size_t elemEnd = line.find("/>", elemStart);
+    size_t nameLen = line.find(" ", elemStart)-elemStart;
+    return line.substr(elemStart, nameLen);
+}
+
+std::string readAttribute(std::string line, std::string attributeName)
+{
+    size_t attributeStart = line.find(attributeName)+attributeName.size() + 2; // add 2 for '="'
+    size_t attributeLen = line.find("\"", attributeStart)-attributeStart;
+    return line.substr(attributeStart, attributeLen);
+}
+
+void VTKFile::readHeader()
+{
+    //TODO make this more flexible
+    std::ifstream file(this->fileName);
+
+    std::string line;
+
+    getline(file, line); // VTKFile
+    if(line[1]=='?') getline(file, line); // ignore first line if xml version
+
+    getline(file, line); // ImageData
+    std::vector<int> wholeExtent = readStringToVector<int>(readAttribute(line, "WholeExtent"));
+    std::vector<float> origin = readStringToVector<float>(readAttribute(line, "Origin"));
+    std::vector<float> spacing = readStringToVector<float>(readAttribute(line, "Spacing"));
+
+    getline(file, line); // Piece 
+    std::vector<int> pieceExtent = readStringToVector<int>(readAttribute(line, "Extent"));
+    getline(file, line); // PointData
+
+    getline(file, line);
+    while(strcmp(readElement(line).c_str(), "DataArray")==0)
+    {
+        Quantity quant = Quantity();
+        quant.name = readAttribute(line, "Name");
+        quant.offset = std::stoi(readAttribute(line, "offset"));
+        this->quantities.push_back( quant );
+        getline(file, line);
+    }
+    getline(file, line); // </Piece
+    getline(file, line); // </ImageData
+    getline(file, line); // AppendedData
+
+    int offset = int(file.tellg())+sizeof(char)+4; // skip underscore and bytesPerVal
+
+    for(auto& quantity: this->quantities)
+    {
+        quantity.offset += offset;
+    }
+
+    file.close();
+
+    this->deltaX = spacing[0];
+    this->deltaY = spacing[1];
+    this->deltaZ = spacing[2];
+
+    this->nx = pieceExtent[1]-pieceExtent[0]+1;
+    this->ny = pieceExtent[3]-pieceExtent[2]+1;
+    this->nz = pieceExtent[5]-pieceExtent[4]+1;
+
+    this->minX = origin[0]+this->deltaX*pieceExtent[0]; this->maxX = (this->nx-1)*this->deltaX+this->minX;
+    this->minY = origin[1]+this->deltaY*pieceExtent[2]; this->maxY = (this->ny-1)*this->deltaY+this->minY;
+    this->minZ = origin[2]+this->deltaZ*pieceExtent[4]; this->maxZ = (this->nz-1)*this->deltaZ+this->minZ;
+    // printFileInfo();
+
+}
+
+bool VTKFile::markNANs(std::vector<uint> readIndices)
+{
+    std::ifstream buf(fileName.c_str(), std::ios::in | std::ios::binary);
+
+    std::vector<double> tmp;
+    tmp.reserve(readIndices.size());
+    buf.seekg(this->quantities[0].offset);
+    buf.read((char*) tmp.data(), sizeof(double)*readIndices.size());
+    auto firstNAN = std::find_if(tmp.begin(), tmp.end(), [](auto it){ return isnan(it); });
+    
+    return firstNAN != tmp.end();
+}
+
+void VTKFile::loadFile()
+{
+    std::ifstream buf(this->fileName.c_str(), std::ios::in | std::ios::binary);
+    for(auto& quantity: this->quantities)
+    {
+        quantity.values.resize(getNumberOfPoints());
+        buf.seekg(quantity.offset);
+        buf.read(reinterpret_cast<char*>(quantity.values.data()), this->getNumberOfPoints()*sizeof(double));
+    }
+
+    buf.close();
+
+    this->loaded = true;
+}
+
+void VTKFile::unloadFile()
+{
+    for(auto& quantity : this->quantities)
+    {
+        std::vector<double> replacement;
+        quantity.values.swap(replacement);
+    }
+    this->loaded = false;
+}
+
+void VTKFile::getData(real *data, uint numberOfNodes, const std::vector<uint> &readIndices,
+                      const std::vector<uint> &writeIndices, uint offsetRead, uint offsetWrite)
+{
+    if(!this->loaded) loadFile();
+
+    size_t nPoints = writeIndices.size();
+
+    for(size_t j=0; j<this->quantities.size(); j++)
+    {
+        real* quant = &data[j*numberOfNodes];
+        for(size_t i=0; i<nPoints; i++)
+        {
+            quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndices[i]+offsetRead];
+        }
+    }
+}
+
+void VTKFile::printFileInfo()
+{
+    printf("file %s with \n nx %i ny %i nz %i \n origin %f %f %f \n spacing %f %f %f \n", 
+            fileName.c_str(), nx, ny, nz, minX, minY, minZ, deltaX, deltaY, deltaZ);
+    for(auto quantity: this->quantities)
+    {
+        printf("\t quantity %s offset %i \n", quantity.name.c_str(), quantity.offset);
+    }
+        
+}
+
+
+void VTKFileCollection::findFiles()
+{
+    bool foundLastLevel = false;
+
+    while(!foundLastLevel)
+    {
+        bool foundLastID = false;
+        std::vector<std::vector<VTKFile>> filesOnThisLevel;
+        while(!foundLastID)
+        {
+            bool foundLastPart = false;
+            std::vector<VTKFile> filesWithThisId;
+            while (!foundLastPart)
+            {
+                std::string fname = makeFileName((int)files.size(), (int)filesOnThisLevel.size(), (int)filesWithThisId.size());
+                std::ifstream f(fname);
+                if(f.good())
+                    filesWithThisId.emplace_back(fname);
+                else
+                    foundLastPart = true;    
+            }
+            if(!filesWithThisId.empty())
+            {
+                VF_LOG_INFO("VTKFileCollection found {} files with ID {} level {}", filesWithThisId.size(), filesOnThisLevel.size(), files.size() );
+                filesOnThisLevel.push_back(filesWithThisId);
+            }
+            else foundLastID = true;
+        }
+
+
+        if(!filesOnThisLevel.empty())
+            files.push_back(filesOnThisLevel);
+        else 
+            foundLastLevel = true;
+
+    }
+
+    if(files.empty())
+        VF_LOG_CRITICAL("VTKFileCollection found no files!"); 
+}
+    
+void TransientBCInputFileReader::getNeighbors(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM)
+{
+    std::copy(planeNeighbor0PP.begin(), planeNeighbor0PP.end(), &neighbor0PP[writingOffset]);
+    std::copy(planeNeighbor0PM.begin(), planeNeighbor0PM.end(), &neighbor0PM[writingOffset]);
+    std::copy(planeNeighbor0MP.begin(), planeNeighbor0MP.end(), &neighbor0MP[writingOffset]);
+    std::copy(planeNeighbor0MM.begin(), planeNeighbor0MM.end(), &neighbor0MM[writingOffset]);
+}
+
+void TransientBCInputFileReader::getWeights(real* _weights0PP, real* _weights0PM, real* _weights0MP, real* _weights0MM)
+{
+    std::copy(weights0PP.begin(), weights0PP.end(), &_weights0PP[writingOffset]);
+    std::copy(weights0PM.begin(), weights0PM.end(), &_weights0PM[writingOffset]);
+    std::copy(weights0MP.begin(), weights0MP.end(), &_weights0MP[writingOffset]);
+    std::copy(weights0MM.begin(), weights0MM.end(), &_weights0MM[writingOffset]);
+}
+
+
+void VTKReader::initializeIndexVectors()
+{
+    this->readIndices.resize(this->fileCollection->files.size());
+    this->writeIndices.resize(this->fileCollection->files.size());
+    this->nFile.resize(this->fileCollection->files.size());
+    for(size_t lev=0; lev<this->fileCollection->files.size(); lev++)
+    {
+        this->readIndices[lev].resize(this->fileCollection->files[lev].size());
+        this->writeIndices[lev].resize(this->fileCollection->files[lev].size());
+        this->nFile[lev].resize(this->fileCollection->files[lev].size());
+    }
+}
+
+void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ)
+{
+    this->nPoints = (uint)coordsY.size();
+    this->initializeIndexVectors();
+    real max_diff = 1e-4; // maximum distance between point on grid and precursor plane to count as exact match
+    real eps = 1e-7; // small number to avoid division by zero
+    bool perfect_match = true;
+
+    this->weights0PP.reserve(this->nPoints);
+    this->weights0PM.reserve(this->nPoints);
+    this->weights0MP.reserve(this->nPoints);
+    this->weights0MM.reserve(this->nPoints);
+
+    this->planeNeighbor0PP.reserve(this->nPoints);
+    this->planeNeighbor0PM.reserve(this->nPoints);
+    this->planeNeighbor0MP.reserve(this->nPoints);
+    this->planeNeighbor0MM.reserve(this->nPoints);
+
+    for(uint i=0; i<nPoints; i++)
+    {
+
+        real posY = coordsY[i];
+        real posZ = coordsZ[i];
+        bool found0PP = false, found0PM = false, found0MP = false, found0MM = false, foundAll = false;
+
+        uint level = this->readLevel;
+
+        for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++)
+        {
+            VTKFile &file = this->fileCollection->files[level][fileId][0];
+            if(!file.inBoundingBox(posY, posZ, 0.0f)) continue;
+
+            // y in simulation is x in precursor/file, z in simulation is y in precursor/file 
+            // simulation -> file: N -> E, S -> W, T -> N, B -> S
+            int idx = file.findNeighborMMM(posY, posZ, 0.f);                            //!> index of nearest WSB neighbor on precursor file
+            
+            if(idx!=-1)
+            {
+                // Filter for exact matches
+                if(abs(posY-file.getX(idx)) < max_diff && abs(posZ-file.getY(idx)) < max_diff) 
+                {
+                    this->weights0PP.emplace_back(1e6f);
+                    this->weights0PM.emplace_back(0.f);
+                    this->weights0MP.emplace_back(0.f);
+                    this->weights0MM.emplace_back(0.f);
+                    uint writeIdx = this->getWriteIndex(level, fileId, idx);            //!> writeIdx: index on host/device array where precursor value will be written to after loading from file
+                    this->planeNeighbor0PP.push_back(writeIdx);                          //!> neighbor lists mapping where BC kernel should read from on host/device array
+                    this->planeNeighbor0PM.push_back(writeIdx);
+                    this->planeNeighbor0MP.push_back(writeIdx);
+                    this->planeNeighbor0MM.push_back(writeIdx);
+                    found0PP = true;
+                    found0PM = true;
+                    found0MM = true;
+                    found0MP = true;
+                } 
+                else
+                {
+                    perfect_match = false;
+                }
+
+                if(!found0MM)
+                {
+                    found0MM = true;
+                    real dy = file.getX(idx)-posY;
+                    real dz = file.getY(idx)-posZ;
+                    this->weights0MM.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighbor0MM.emplace_back(getWriteIndex(level, fileId, idx));
+                }
+                
+            } 
+            
+            if(!found0PP) //NT in simulation is EN in precursor
+            {
+                int index = file.findNeighborPPM(posY, posZ, 0.f);
+                if(index!=-1)
+                {
+                    found0PP = true;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
+                    this->weights0PP.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
+                }
+            }
+
+            if(!found0PM) //NB in simulation is ES in precursor
+            {
+                int index = file.findNeighborPMM(posY, posZ, 0.f);
+                if(index!=-1)
+                {
+                    found0PM = true;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
+                    this->weights0PM.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
+                }
+            }
+
+            if(!found0MP) //ST in simulation is WN in precursor
+            {
+                int index = file.findNeighborMPM(posY, posZ, 0.f);
+                if(index!=-1)
+                {
+                    found0MP = true;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
+                    this->weights0MP.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighbor0MP.emplace_back(getWriteIndex(level, fileId, index));
+                }
+            }
+
+            foundAll = found0PP && found0PM && found0MP && found0MM;
+
+            if(foundAll) break;
+        }
+
+        if(!foundAll)
+        {
+            VF_LOG_CRITICAL("Found no matching precursor neighbors for grid point at y={}, z={} \n", posY, posZ);
+            throw std::runtime_error("VTKReader::fillArrays(): Did not find neighbors in the FileCollection for all points");
+        }
+    }
+
+    if(perfect_match)
+        printf("Precursor was a perfect match \n");
+
+
+    for(size_t level=0; level<this->fileCollection->files.size(); level++){
+        for(size_t id=0; id<this->fileCollection->files[level].size(); id++){
+            if(this->fileCollection->files[level][id][0].markNANs(this->readIndices[level][id]))
+                throw std::runtime_error("Found a NAN in the precursor where a velocity is needed");
+    }}
+}
+
+uint VTKReader::getWriteIndex(int level, int id, int linearIndex)
+{
+    auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex);
+    uint idx = it-this->writeIndices[level][id].begin();
+    if(it==this->writeIndices[level][id].end())                         
+    {
+        this->writeIndices[level][id].push_back(this->nPointsRead);     //!> index on host/device array where value from file will be written to
+        this->readIndices[level][id].push_back(linearIndex);            //!> index in file that will be read from 
+        this->nPointsRead++;
+    }
+    return idx;
+}
+
+
+void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
+{
+    // for(size_t level=0; level<this->fileCollection->files.size(); level++)
+    // {
+        uint level = this->readLevel;
+        for(size_t id=0; id<this->fileCollection->files[level].size(); id++)
+        {
+            size_t numberOfFiles = this->nFile[level][id];
+
+
+            if(!this->fileCollection->files[level][id][numberOfFiles].inZBounds(time))
+            {
+                numberOfFiles++;
+
+                printf("switching to precursor file no. %zu\n", numberOfFiles);
+                if(numberOfFiles == this->fileCollection->files[level][id].size())
+                    throw std::runtime_error("Not enough Precursor Files to read");
+
+                this->fileCollection->files[level][id][numberOfFiles-1].unloadFile();
+                if(numberOfFiles+1<this->fileCollection->files[level][id].size())
+                {
+                    VTKFile* nextFile = &this->fileCollection->files[level][id][numberOfFiles+1];
+                    if(! nextFile->isLoaded())
+                    {
+                        read.wait();
+                        read = std::async(std::launch::async, [](VTKFile* file){ file->loadFile(); }, &this->fileCollection->files[level][id][numberOfFiles+1]);
+                    }
+                }
+            }
+        
+
+            VTKFile* file = &this->fileCollection->files[level][id][numberOfFiles];
+
+            int off = file->getClosestIdxZ(time)*file->getNumberOfPointsInXYPlane();
+            file->getData(data, numberOfNodes, this->readIndices[level][id], this->writeIndices[level][id], off, this->writingOffset);
+            this->nFile[level][id] = numberOfFiles;
+        }
+    // }
+}
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
new file mode 100644
index 0000000000000000000000000000000000000000..1663a3ff37ba1bb062647847462d4e364baed93b
--- /dev/null
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
@@ -0,0 +1,201 @@
+#ifndef TRANSIENTBCSETTER_H_
+#define TRANSIENTBCSETTER_H_
+
+#include "Core/DataTypes.h"
+#include <Core/StringUtilities/StringUtil.h>
+#include "PointerDefinitions.h"
+
+#include <string>
+#include <vector>
+#include <math.h>
+#include <sstream>
+#include <future>
+class Grid;
+namespace gg
+{
+    class BoundaryCondition;
+}
+
+
+enum class FileType
+{
+    VTK
+};
+
+struct Quantity
+{
+    std::string name;
+    int offset;
+    std::vector<double> values;
+};
+
+class VTKFile
+{
+public: 
+    explicit VTKFile(std::string _fileName): 
+    fileName(_fileName)
+    {
+        readHeader();
+        this->loaded = false;
+        // printFileInfo();
+    };
+
+    void getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite);
+    bool markNANs(std::vector<uint> readIndices);
+    bool inBoundingBox(real posX, real posY, real posZ){return  inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); };
+    bool inXBounds(real posX){ return posX<=maxX && posX>=minX; };
+    bool inYBounds(real posY){ return posY<=maxY && posY>=minY; };
+    bool inZBounds(real posZ){ return posZ<=maxZ && posZ>=minZ; };
+    int findNeighborMMM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)  , getIdx0M0(posY)  , getIdx00M(posZ)  ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborMMP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)  , getIdx0M0(posY)  , getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborMPM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)  , getIdx0M0(posY)+1, getIdx00M(posZ)  ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborMPP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)  , getIdx0M0(posY)+1, getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborPMM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)  , getIdx00M(posZ)  ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborPMP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)  , getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborPPM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)+1, getIdx00M(posZ)  ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int findNeighborPPP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)+1, getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; };
+    int getIdxX(int linearIdx){ return linearIdx%nx;};
+    int getIdxY(int linearIdx){ return (linearIdx/nx)%ny;};
+    int getIdxZ(int linearIdx){ return linearIdx/(nx*ny); };
+    real getX(int linearIdx){ return getIdxX(linearIdx)*deltaX+minX; };
+    real getY(int linearIdx){ return getIdxY(linearIdx)*deltaY+minY; };
+    real getZ(int linearIdx){ return getIdxZ(linearIdx)*deltaZ+minZ; };
+    int getIdxM00(real posX){ return (posX-minX)/deltaX; };
+    int getIdx0M0(real posY){ return (posY-minY)/deltaY; };
+    int getIdx00M(real posZ){ return (posZ-minZ)/deltaZ; };
+    int getClosestIdxX(real posX){ int x = round((posX-minX)/deltaX); return x>nx ? nx : (x<0 ? 0 : x);};
+    int getClosestIdxY(real posY){ int y = round((posY-minY)/deltaY); return y>ny ? ny : (y<0 ? 0 : y);};
+    int getClosestIdxZ(real posZ){ int z = round((posZ-minZ)/deltaZ); return z>nz ? nz : (z<0 ? 0 : z);};
+    int getLinearIndex(int idxX, int idxY, int idxZ){ return idxX + nx*(idxY+ny*idxZ); };
+    int getNumberOfPointsInXYPlane(){ return nx*ny; }
+    int getNumberOfPointsInYZPlane(){ return ny*nz; }
+    int getNumberOfPointsInXZPlane(){ return nx*nz; }
+    int getNumberOfPoints(){ return nx*ny*nz; }
+    size_t getNumberOfQuantities(){ return quantities.size(); }
+    void loadFile();
+    void unloadFile();
+    bool isLoaded(){return loaded;};
+
+
+private:
+    void readHeader();
+    void printFileInfo();
+
+public:
+
+private:
+    std::string fileName;
+    real minX, maxX, minY, maxY, minZ, maxZ;
+    real deltaX, deltaY, deltaZ;
+    int nx, ny, nz;
+    std::vector<Quantity> quantities;
+    bool loaded;
+};
+
+class FileCollection
+{
+public:
+    FileCollection(std::string _prefix): 
+    prefix(_prefix){};
+
+    virtual ~FileCollection() = default;
+
+    virtual size_t getNumberOfQuantities() = 0;
+
+    virtual FileType getFileType() = 0;
+
+protected:
+    std::string prefix;
+};
+
+
+class VTKFileCollection : public FileCollection
+{
+public:
+    VTKFileCollection(std::string _prefix): 
+    FileCollection(_prefix)
+    {
+        findFiles();
+    };
+
+    FileType getFileType() override{ return FileType::VTK; };
+    size_t getNumberOfQuantities() override{ return files[0][0][0].getNumberOfQuantities(); }
+    
+
+private:
+    void findFiles();
+    std::string makeFileName(int level, int id, int part)
+    { 
+        return prefix + "_lev_" + StringUtil::toString<int>(level)
+                    + "_ID_" +    StringUtil::toString<int>(id)
+                    + "_File_" +  StringUtil::toString<int>(part) 
+                    + ".bin." + suffix;
+    };
+
+
+public:
+    static const inline std::string suffix = "vti";
+    std::vector<std::vector<std::vector<VTKFile>>> files;
+};
+
+
+class TransientBCInputFileReader
+{
+public:
+    TransientBCInputFileReader()
+    { 
+        this->nPoints = 0; 
+        this->nPointsRead = 0;
+        this->writingOffset = 0;        
+    };
+    virtual ~TransientBCInputFileReader() = default;
+
+    virtual void getNextData(real* data, uint numberOfNodes, real time)=0;
+    virtual void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ)=0;
+    uint getNPoints(){return nPoints; };
+    uint getNPointsRead(){return nPointsRead; };
+    size_t getNumberOfQuantities(){ return nQuantities; };
+    void setWritingOffset(uint offset){ this->writingOffset = offset; }
+    void getNeighbors(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM);
+    void getWeights(real* _weights0PP, real* _weights0PM, real* _weights0MP, real* _weights0MM);
+
+public:
+    std::vector<uint> planeNeighbor0PP,  planeNeighbor0PM, planeNeighbor0MP, planeNeighbor0MM;
+    std::vector<real> weights0PP, weights0PM, weights0MP,  weights0MM;
+
+protected:
+    uint nPoints, nPointsRead, writingOffset;
+    uint nReads=0;
+    size_t nQuantities=0;
+};
+
+
+class VTKReader : public TransientBCInputFileReader
+{
+public:
+    VTKReader(SPtr<VTKFileCollection> _fileCollection, uint _readLevel):
+    fileCollection(_fileCollection), 
+    readLevel(_readLevel)
+    {
+        this->nQuantities = fileCollection->getNumberOfQuantities();
+        read = std::async([](){});
+    };
+    void getNextData(real* data, uint numberOfNodes, real time) override;
+    void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) override;
+private:  
+    uint getWriteIndex(int level, int id, int linearIdx);
+    void initializeIndexVectors();
+
+private:
+    std::vector<std::vector<std::vector<uint>>> readIndices, writeIndices;
+    std::vector<std::vector<size_t>> nFile;
+    SPtr<VTKFileCollection> fileCollection;
+    uint readLevel;
+    std::future<void> read;
+};
+
+
+SPtr<FileCollection> createFileCollection(std::string prefix, FileType type);
+SPtr<TransientBCInputFileReader> createReaderForCollection(SPtr<FileCollection> fileCollection, uint readLevel);
+
+#endif //TRANSIENTBCSETTER_H_
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp
index 5102f60fc295aadf4323a4b332bf3dd8f7f21dbf..b0fb2604946b83ead45c30adabbcfe8dc26fa656 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp
@@ -36,12 +36,12 @@
 
 #include "grid/BoundaryConditions/Side.h"
 #include "grid/Grid.h"
+#include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
 
 bool gg::BoundaryCondition::isSide( SideType side ) const
 {
     return this->side->whoAmI() == side;
 }
-
 //////////////////////////////////////////////////////////////////////////
 
 void VelocityBoundaryCondition::setVelocityProfile(
@@ -124,5 +124,4 @@ void StressBoundaryCondition::fillSamplingIndices(std::vector<SPtr<Grid> > grid,
         this->velocitySamplingIndices.push_back( grid[level]->transCoordToIndex(x_sampling, y_sampling, z_sampling) );
     }
     
-}
-
+}
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
index 4a3990d9f815042297be76ae83a61268c8ad6815..22342aec9839afad9bb37b1b11812f6d1750ed7b 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
@@ -45,6 +45,8 @@ class Grid;
 class Side;
 enum class SideType;
 
+class TransientBCInputFileReader;
+
 namespace gg
 {
 class BoundaryCondition
@@ -63,6 +65,8 @@ public:
     bool isSide(SideType side) const;
 
     real getQ(uint index, uint dir) { return this->qs[index][dir]; }
+
+    void getCoords( SPtr<Grid> grid, std::vector<real>& x, std::vector<real>& y, std::vector<real>& z);
 };
 
 }
@@ -246,6 +250,7 @@ public:
     real getVy(uint index) { return this->vyList[index]; }
     real getVz(uint index) { return this->vzList[index]; }
 
+
     void setVelocityProfile( SPtr<Grid> grid, std::function<void(real,real,real,real&,real&,real&)> velocityProfile );
 };
 
@@ -329,5 +334,32 @@ public:
     real getNormalz(uint index) { return this->normalZList[index]; }
 };
 
+class PrecursorBoundaryCondition : public gg::BoundaryCondition
+{
+public:
+    static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int timeStepsBetweenReads, real velocityX, real velocityY, real velocityZ)
+    {
+        return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ));
+    }
 
+    SPtr<TransientBCInputFileReader> getReader(){ return reader; }
+    real getVelocityX() { return velocityX; }
+    real getVelocityY() { return velocityY; }
+    real getVelocityZ() { return velocityZ; }
+
+private:
+    PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _timeStepsBetweenReads, real vx, real vy, real vz) : reader(_reader), timeStepsBetweenReads(_timeStepsBetweenReads), velocityX(vx), velocityY(vy), velocityZ(vz) { };
+    virtual char getType() const override
+    {
+        return vf::gpu::BC_VELOCITY;
+    }
+public:
+    uint timeStepsBetweenReads; //!> read data every nth timestep
+
+private:
+    real velocityX = 0.0;
+    real velocityY = 0.0;
+    real velocityZ = 0.0;
+    SPtr<TransientBCInputFileReader> reader;
+};
 #endif
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index 6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a..ba4eea50ffb6bc136528db31207274d626fe9b15 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -37,9 +37,24 @@
 #include "grid/NodeValues.h"
 
 #include "utilities/math/Math.h"
+#include <array>
+#include <cstddef>
+#include <vector>
 
 using namespace gg;
 
+std::array<real, 3> Side::getNormal() const
+{
+    std::array<real, 3> normal;
+    if(this->getCoordinate()==X_INDEX)
+        normal = {(real)this->getDirection(), 0.0, 0.0};
+    if(this->getCoordinate()==Y_INDEX)
+        normal = {0.0, (real)this->getDirection(), 0.0};
+    if(this->getCoordinate()==Z_INDEX)
+        normal = {0.0, 0.0, (real)this->getDirection()};
+    return normal;
+}
+
 void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant,
                       real startInner, real endInner, real startOuter, real endOuter)
 {
@@ -49,11 +64,17 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
         {
             const uint index = getIndex(grid, coord, constant, v1, v2);
 
-            if ((index != INVALID_INDEX) && (  grid->getFieldEntry(index) == vf::gpu::FLUID
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF ))
+            if(index == INVALID_INDEX)
+                continue;
+
+            if (   grid->getFieldEntry(index) == vf::gpu::FLUID
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
+                                            // Overlap of BCs on edge nodes
+                                            || grid->nodeHasBC(index) )
             {
                 grid->setFieldEntry(index, boundaryCondition->getType());
                 boundaryCondition->indices.push_back(index);
@@ -64,9 +85,12 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
 
                 boundaryCondition->patches.push_back(0);
             }
-
         }
     }
+
+    const auto currentBCSide = this->whoAmI();
+    if(currentBCSide != SideType::GEOMETRY)
+        grid->addBCalreadySet(currentBCSide);
 }
 
 void Side::setPressureNeighborIndices(SPtr<BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index)
@@ -119,50 +143,111 @@ void Side::setStressSamplingIndices(SPtr<BoundaryCondition> boundaryCondition, S
 
 void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uint index)
 {
-
     std::vector<real> qNode(grid->getEndDirection() + 1);
 
-    for (int dir = 0; dir <= grid->getEndDirection(); dir++)
-    {
-        real x,y,z;
-        grid->transIndexToCoords( index, x, y, z );
+    for (int dir = 0; dir <= grid->getEndDirection(); dir++) {
+        real x, y, z;
+        grid->transIndexToCoords(index, x, y, z);
 
-        real coords[3] = {x,y,z};
+        std::array<real, 3> coords = { x, y, z };
+        std::array<real, 3> neighborCoords = getNeighborCoordinates(grid.get(), coords, (size_t)dir);
 
-        real neighborX = x + grid->getDirection()[dir * DIMENSION + 0] * grid->getDelta();
-        real neighborY = y + grid->getDirection()[dir * DIMENSION + 1] * grid->getDelta();
-        real neighborZ = z + grid->getDirection()[dir * DIMENSION + 2] * grid->getDelta();
+        correctNeighborForPeriodicBoundaries(grid.get(), coords, neighborCoords);
 
-        // correct neighbor coordinates in case of periodic boundaries
-        if( grid->getPeriodicityX() && grid->getFieldEntry( grid->transCoordToIndex( neighborX, y, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborX > x ) neighborX = grid->getFirstFluidNode( coords, 0, grid->getStartX() );
-            else                neighborX = grid->getLastFluidNode ( coords, 0, grid->getEndX() );
-        }
+        const uint neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]);
 
-        if( grid->getPeriodicityY() && grid->getFieldEntry( grid->transCoordToIndex( x, neighborY, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborY > y ) neighborY = grid->getFirstFluidNode( coords, 1, grid->getStartY() );
-            else                neighborY = grid->getLastFluidNode ( coords, 1, grid->getEndY() );
+        //! Only setting q's that partially point in the Side-normal direction
+        const bool alignedWithNormal = this->isAlignedWithMyNormal(grid.get(), dir);
+        if (grid->isStopperForBC(neighborIndex) && alignedWithNormal) {
+            qNode[dir] = 0.5;
+        } else {
+            qNode[dir] = -1.0;
         }
 
-        if( grid->getPeriodicityZ() && grid->getFieldEntry( grid->transCoordToIndex( x, y, neighborZ ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborZ > z ) neighborZ = grid->getFirstFluidNode( coords, 2, grid->getStartZ() );
-            else                neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() );
+        // reset diagonals in case they were set by another bc
+        resetDiagonalsInCaseOfOtherBC(grid.get(), qNode, dir, coords);
+    }
+
+    boundaryCondition->qs.push_back(qNode);
+}
+
+std::array<real, 3> Side::getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates, size_t direction) const
+{
+    return { coordinates[0] + grid->getDirection()[direction * DIMENSION + 0] * grid->getDelta(),
+             coordinates[1] + grid->getDirection()[direction * DIMENSION + 1] * grid->getDelta(),
+             coordinates[2] + grid->getDirection()[direction * DIMENSION + 2] * grid->getDelta() };
+}
+
+bool Side::neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const
+{
+    const auto neighborCoords = getNeighborCoordinates(grid, coordinates, sideToD3Q27.at(side));
+    const auto neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]);
+    return grid->isStopperForBC(neighborIndex);
+}
+
+void Side::resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real> &qNode, int dir,
+                                         const std::array<real, 3> &coordinates) const
+{
+    // When to reset a diagonal q to -1:
+    // - it is normal to another boundary condition which was already set
+    // - and it actually is influenced by the other bc:
+    //   We check if its neighbor in the regular direction to the other bc is a stopper. If it is a stopper, it is influenced by the other bc.
+
+    if (qNode[dir] == 0.5 && grid->getBCAlreadySet().size() > 0) {
+        for (int i = 0; i < (int)grid->getBCAlreadySet().size(); i++) {
+            SideType otherDir = grid->getBCAlreadySet()[i];
+
+            // only reset normals for nodes on edges and corners, not on faces
+            if (!neighborNormalToSideIsAStopper(grid, coordinates, otherDir))
+                continue;
+
+            const auto otherNormal = normals.at(otherDir);
+            if (isAlignedWithNormal(grid, dir, otherNormal)) {
+                qNode[dir] = -1.0;
+            }
         }
+    }
+}
 
-        uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ );
-        if( grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID )
-            qNode[dir] = 0.5;
+bool Side::isAlignedWithMyNormal(const Grid *grid, int dir) const
+{
+    std::array<real, 3> normal = this->getNormal();
+    return isAlignedWithNormal(grid, dir, normal);
+}
+
+bool Side::isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3> &normal) const
+{
+    return (normal[0] * grid->getDirection()[dir * DIMENSION + 0] +
+            normal[1] * grid->getDirection()[dir * DIMENSION + 1] +
+            normal[2] * grid->getDirection()[dir * DIMENSION + 2]) > 0;
+}
+
+void Side::correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighborCoords) const
+{
+    // correct neighbor coordinates in case of periodic boundaries
+    if (grid->getPeriodicityX() &&
+        grid->getFieldEntry(grid->transCoordToIndex(neighborCoords[0], coords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[0] > coords[0])
+            neighborCoords[0] = grid->getFirstFluidNode(coords.data(), 0, grid->getStartX());
         else
-            qNode[dir] = -1.0;
+            neighborCoords[0] = grid->getLastFluidNode(coords.data(), 0, grid->getEndX());
+    }
 
+    if (grid->getPeriodicityY() &&
+        grid->getFieldEntry(grid->transCoordToIndex(coords[0], neighborCoords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[1] > coords[1])
+            neighborCoords[1] = grid->getFirstFluidNode(coords.data(), 1, grid->getStartY());
+        else
+            neighborCoords[1] = grid->getLastFluidNode(coords.data(), 1, grid->getEndY());
     }
 
-    boundaryCondition->qs.push_back(qNode);
+    if (grid->getPeriodicityZ() &&
+        grid->getFieldEntry(grid->transCoordToIndex(coords[0], coords[1], neighborCoords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[2] > coords[2])
+            neighborCoords[2] = grid->getFirstFluidNode(coords.data(), 2, grid->getStartZ());
+        else
+            neighborCoords[2] = grid->getLastFluidNode(coords.data(), 2, grid->getEndZ());
+    }
 }
 
 uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2)
@@ -177,7 +262,7 @@ uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1,
 }
 
 
-void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void Geometry::addIndices(const std::vector<SPtr<Grid>> &grids, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     auto geometryBoundaryCondition = std::dynamic_pointer_cast<GeometryBoundaryCondition>(boundaryCondition);
 
@@ -190,7 +275,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound
 
         for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++)
         {
-			const real q = grids[level]->getQValue(index, dir);
+            const real q = grids[level]->getQValue(index, dir);
 
             qNode[dir] = q;
 
@@ -218,7 +303,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound
 
 
 
-void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartY();
     real endInner = grid[level]->getEndY();
@@ -234,7 +319,7 @@ void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 
 }
 
-void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartY();
     real endInner = grid[level]->getEndY();
@@ -249,7 +334,7 @@ void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     Side::addIndices(grid[level], boundaryCondition, "x", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
-void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -265,7 +350,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 }
 
 
-void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -281,7 +366,7 @@ void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 }
 
 
-void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -296,7 +381,7 @@ void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
-void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -307,6 +392,6 @@ void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     real coordinateNormal = grid[level]->getEndZ() - grid[level]->getDelta();
 
     if( coordinateNormal < grid[0]->getEndZ() - grid[0]->getDelta() ) return;
-    
+
     Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
index 6df6bfccc9a39b80de3ac43d057a03945d035b34..624b3722a1c909ba26063b49565779b924d34adc 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
@@ -33,10 +33,14 @@
 #ifndef SIDE_H
 #define SIDE_H
 
+#include <cstddef>
 #include <string>
 #include <vector>
+#include <map>
+#include <array>
 
 #include "gpu/GridGenerator/global.h"
+#include "lbm/constants/D3Q27.h"
 
 #define X_INDEX 0
 #define Y_INDEX 1
@@ -59,37 +63,59 @@ enum class SideType
     MX, PX, MY, PY, MZ, PZ, GEOMETRY
 };
 
-
-
 class Side
 {
 public:
     virtual ~Side() = default;
-    virtual void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) = 0;
+    virtual void addIndices(const std::vector<SPtr<Grid>> &grid, uint level,
+                            SPtr<gg::BoundaryCondition> boundaryCondition) = 0;
 
     virtual int getCoordinate() const = 0;
     virtual int getDirection() const = 0;
 
     virtual SideType whoAmI() const = 0;
 
+    std::array<real, 3> getNormal() const;
+
 protected:
-    static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant,
+    void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant,
                            real startInner, real endInner, real startOuter, real endOuter);
 
     static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index);
 
     static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index);
 
-    static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index);
+    void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index);
+
+    virtual void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const;
+
+    virtual bool isAlignedWithMyNormal(const Grid *grid, int dir) const;
+    bool isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3>& normal) const;
 
 private:
     static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2);
+    void resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real>& qNode, int dir, const std::array<real, 3> &coordinates) const;
+    std::array<real, 3> getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates,
+                                               size_t direction) const;
+    bool neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const;
+
+protected:
+    const std::map<SideType, const std::array<real, 3>> normals = {
+        { SideType::MX, { NEGATIVE_DIR, 0.0, 0.0 } }, { SideType::PX, { POSITIVE_DIR, 0.0, 0.0 } },
+        { SideType::MY, { 0.0, NEGATIVE_DIR, 0.0 } }, { SideType::PY, { 0.0, POSITIVE_DIR, 0.0 } },
+        { SideType::MZ, { 0.0, 0.0, NEGATIVE_DIR } }, { SideType::PZ, { 0.0, 0.0, POSITIVE_DIR } }
+    };
+    const std::map<SideType, size_t> sideToD3Q27 = {
+        { SideType::MX, vf::lbm::dir::DIR_M00 }, { SideType::PX, vf::lbm::dir::DIR_P00 },
+        { SideType::MY, vf::lbm::dir::DIR_0M0 }, { SideType::PY, vf::lbm::dir::DIR_0P0 },
+        { SideType::MZ, vf::lbm::dir::DIR_00M }, { SideType::PZ, vf::lbm::dir::DIR_00P }
+    };
 };
 
 class Geometry : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -110,7 +136,7 @@ public:
 class MX : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -131,7 +157,7 @@ public:
 class PX : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -153,7 +179,7 @@ public:
 class MY : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -174,7 +200,7 @@ public:
 class PY : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -196,7 +222,7 @@ public:
 class MZ : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -217,7 +243,7 @@ public:
 class PZ : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..36a286a8766db4af7e109eb3f8d47add401779f9
--- /dev/null
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp
@@ -0,0 +1,873 @@
+#include "Side.h"
+#include "PointerDefinitions.h"
+#include "gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "grid/GridImp.h"
+#include "grid/NodeValues.h"
+#include "lbm/constants/D3Q27.h"
+#include "gmock/gmock.h"
+#include <algorithm>
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+using namespace vf::gpu;
+using namespace vf::lbm::dir;
+
+class SideTestSpecificSubclass : public Side
+{
+
+public:
+    void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index)
+    {
+        Side::setQs(grid, boundaryCondition, index);
+    };
+    int sideDirection = POSITIVE_DIR;
+    int coordinateDirection = X_INDEX;
+    SideType mySide = SideType::PX;
+
+private:
+    void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const override
+    {
+    }
+
+    int getDirection() const override
+    {
+        return sideDirection;
+    }
+
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override
+    {
+    }
+
+    int getCoordinate() const override
+    {
+        return coordinateDirection;
+    }
+
+    SideType whoAmI() const override
+    {
+        return mySide;
+    }
+};
+
+class GridDouble : public GridImp
+{
+
+public:
+    int endDirection = -1;
+
+    GridDouble()
+    {
+        this->distribution = DistributionHelper::getDistribution27();
+    }
+
+    void transIndexToCoords(uint index, real &x, real &y, real &z) const override
+    {
+        x = 0;
+        y = 0;
+        z = 0;
+    }
+
+    real getDelta() const override
+    {
+        return 1.0;
+    }
+
+    uint transCoordToIndex(const real &x, const real &y, const real &z) const override
+    {
+        return 0;
+    }
+
+    char getFieldEntry(uint /*matrixIndex*/) const override
+    {
+        return STOPPER_OUT_OF_GRID_BOUNDARY;
+    }
+
+    int getEndDirection() const override
+    {
+        return endDirection;
+    }
+};
+
+class BoundaryConditionSpy : public gg::BoundaryCondition
+{
+public:
+    char getType() const override
+    {
+        return 't';
+    };
+    const std::vector<std::vector<real>> &getQs()
+    {
+        return this->qs;
+    }
+    void resetQVector()
+    {
+        this->qs.clear();
+    }
+};
+
+class SideTestBC : public testing::Test
+{
+protected:
+    SideTestSpecificSubclass side;
+    SPtr<GridDouble> grid = std::make_shared<GridDouble>();
+    SPtr<BoundaryConditionSpy> bc = std::make_shared<BoundaryConditionSpy>();
+    uint index = 0;
+
+    std::vector<real> noBC;
+
+    void SetUp() override
+    {
+        grid->endDirection = 26;
+    }
+};
+
+TEST_F(SideTestBC, setQs2D_whenSettingPX_setAllQsNormalToBC)
+{
+    grid->endDirection = 10;
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(11, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs2D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    grid->endDirection = 10;
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(11, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPX_setAllQsNormalToPX)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    // no previous BC on this node
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+
+    // node already has BC in MX direction, but this does not change anything
+
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    actualQs = bc->getQs()[0];
+
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenGeometryBCInVector_thenSetPX_throws)
+{
+    // do not add Geometry BC to this vector, as it has an invalid normal
+    grid->addBCalreadySet(SideType::GEOMETRY);
+
+    EXPECT_THROW(side.setQs(grid, bc, index), std::out_of_range);
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingPX_setAllQsNormalToBC)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingMX_setAllQsNormalToBC)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingMZ_setAllQsNormalToBC)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingPZ_setAllQsNormalToBC)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h
index 3f28120a5d969fcc5d7b2a3402a2169ff97c0cc3..ad2ce473fb65fe4414f6da5c4caf0d3e140b7e02 100644
--- a/src/gpu/GridGenerator/grid/Grid.h
+++ b/src/gpu/GridGenerator/grid/Grid.h
@@ -47,6 +47,7 @@ struct Triangle;
 class GridInterface;
 class Object;
 class BoundingBox;
+enum class SideType;
 
 class GRIDGENERATOR_EXPORT Grid
 {
@@ -84,6 +85,8 @@ public:
     virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const = 0;
     virtual bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const = 0;
 
+    virtual bool isStopperForBC(uint index) const = 0;
+
     virtual int *getNeighborsX() const = 0;
     virtual int *getNeighborsY() const = 0;
     virtual int *getNeighborsZ() const = 0;
@@ -133,9 +136,9 @@ public:
     virtual void setPeriodicityY(bool periodicity) = 0;
     virtual void setPeriodicityZ(bool periodicity) = 0;
 
-    virtual bool getPeriodicityX() = 0;
-    virtual bool getPeriodicityY() = 0;
-    virtual bool getPeriodicityZ() = 0;
+    virtual bool getPeriodicityX() const = 0;
+    virtual bool getPeriodicityY() const = 0;
+    virtual bool getPeriodicityZ() const = 0;
 
     virtual void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) = 0;
 
@@ -170,6 +173,11 @@ public:
 
     virtual void repairCommunicationIndices(int direction) = 0;
 
+    virtual bool nodeHasBC(uint index) const = 0;
+
+    virtual std::vector<SideType> getBCAlreadySet() = 0;
+    virtual void addBCalreadySet(SideType side) = 0;
+
     // needed for CUDA Streams 
     virtual void findFluidNodeIndices(bool onlyBulk) = 0;
     virtual uint getNumberOfFluidNodes() const = 0;
@@ -178,6 +186,20 @@ public:
     virtual void findFluidNodeIndicesBorder() = 0;
     virtual uint getNumberOfFluidNodesBorder() const = 0;
     virtual void getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const = 0;
+
+    virtual void addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars) = 0;
+    virtual void addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce) = 0;
+    virtual void addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) = 0;
+    virtual void sortFluidNodeIndicesMacroVars() = 0;
+    virtual void sortFluidNodeIndicesApplyBodyForce() = 0;
+    virtual void sortFluidNodeIndicesAllFeatures() = 0;
+
+    virtual uint getNumberOfFluidNodeIndicesMacroVars() const = 0;
+    virtual uint getNumberOfFluidNodeIndicesApplyBodyForce() const = 0;
+    virtual uint getNumberOfFluidNodeIndicesAllFeatures() const = 0; 
+    virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars) const = 0;
+    virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce) const = 0;
+    virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures) const = 0;
 };
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index 739aef59f76a33fa67d472a77ef258469f5e411c..f3d850384816f6690e5ffc158bbdc5e1df0ab328 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -54,6 +54,7 @@ class GridWrapper;
 class Transformator;
 class ArrowTransformator;
 class PolyDataWriterWrapper;
+class TransientBCInputFileReader;
 
 class BoundingBox;
 class Grid;
@@ -113,6 +114,15 @@ public:
     virtual void getPressureValues(real *rho, int *indices, int *neighborIndices, int level) const = 0;
     virtual void getPressureQs(real *qs[27], int level) const                                      = 0;
 
+    virtual uint getPrecursorSize(int level) const              = 0;
+    virtual void getPrecursorValues(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, 
+                                    real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM, 
+                                    int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
+                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, 
+                                    real& velocityX, real& velocityY, real& velocityZ, int level) const = 0;
+
+    virtual void getPrecursorQs(real* qs[27], int level) const  = 0;
+
     virtual uint getGeometrySize(int level) const                                 = 0;
     virtual void getGeometryIndices(int *indices, int level) const                = 0;
     virtual void getGeometryQs(real *qs[27], int level) const                     = 0;
@@ -136,6 +146,21 @@ public:
     virtual void getReceiveIndices(int *sendIndices, int direction, int level) = 0;
 
     virtual void findFluidNodes(bool splitDomain) = 0;
+
+    virtual void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level)           = 0;
+    virtual void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) = 0;
+    virtual void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level)       = 0;
+    virtual void sortFluidNodeIndicesMacroVars(uint level) = 0;
+    virtual void sortFluidNodeIndicesApplyBodyForce(uint level) = 0;
+    virtual void sortFluidNodeIndicesAllFeatures(uint level) = 0;
+    virtual uint getNumberOfFluidNodesMacroVars(uint level) const = 0;
+    virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, int level) const = 0;
+    virtual uint getNumberOfFluidNodesApplyBodyForce(uint level) const = 0;
+    virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, int level) const = 0;
+    virtual uint getNumberOfFluidNodesAllFeatures(uint level) const = 0;
+    virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, int level) const = 0;
+
+
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index 083b9a51e0b151f49922df456e968c4b204e4af7..003e6dcd223d2bf019c83f71349a9a7bec84efdc 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -52,6 +52,8 @@
 #include "io/QLineWriter.h"
 #include "io/SimulationFileWriter/SimulationFileWriter.h"
 
+#include "TransientBCSetter/TransientBCSetter.h"
+
 #include "utilities/communication.h"
 #include "utilities/transformator/ArrowTransformator.h"
 
@@ -103,28 +105,33 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX;
-			boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY;
-			boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ;
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX;
+            boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY;
+            boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ;
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             boundaryConditions[level]->geometryBoundaryCondition->fillSlipNormalLists();
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
     }
 }
 
-void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType, 
-                                                    real nomalX, real normalY, real normalZ, 
-                                                    uint samplingOffset, real z0)
+//=======================================================================================
+//! \brief Set stress boundary concdition using iMEM
+//! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled
+//! \param z0 roughness length [m]
+//! \param dx dx of level 0 [m]
+//!
+void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType,
+                                                    real nomalX, real normalY, real normalZ,
+                                                    uint samplingOffset, real z0, real dx)
 {
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-        SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0);
-
+        SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0*pow(2.0f,level)/dx);
         auto side = SideFactory::make(sideType);
 
         stressBoundaryCondition->side = side;
@@ -171,17 +178,17 @@ void LevelGridBuilder::setVelocityGeometryBoundaryCondition(real vx, real vy, re
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->vx = vx;
-			boundaryConditions[level]->geometryBoundaryCondition->vy = vy;
-			boundaryConditions[level]->geometryBoundaryCondition->vz = vz;
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->vx = vx;
+            boundaryConditions[level]->geometryBoundaryCondition->vy = vy;
+            boundaryConditions[level]->geometryBoundaryCondition->vz = vz;
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             boundaryConditions[level]->geometryBoundaryCondition->fillVelocityLists();
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Velocity BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
     }
 }
 
@@ -223,7 +230,7 @@ void LevelGridBuilder::setNoSlipBoundaryCondition(SideType sideType)
             noSlipBoundaryCondition->fillVelocityLists();
 
             // now effectively just a wrapper for velocityBC with zero velocity. No distinction in Gridgenerator.
-            boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition); 
+            boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition);
         }
     }
 }
@@ -234,12 +241,45 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition()
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry No-Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
+    }
+}
+
+void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads,
+                                                        real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap)
+{
+    if(fileLevelToGridLevelMap.empty())
+    {
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Mapping precursor file levels to the corresponding grid levels" << "\n";
+
+        for (uint level = 0; level < getNumberOfGridLevels(); level++)
+            fileLevelToGridLevelMap.push_back(level);
+    }
+    else
+    {
+        if(fileLevelToGridLevelMap.size()!=getNumberOfGridLevels())
+            throw std::runtime_error("In setPrecursorBoundaryCondition: fileLevelToGridLevelMap does not match with the number of levels");
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Using user defined file to grid level mapping"  << "\n";
+    }
+
+    for (uint level = 0; level < getNumberOfGridLevels(); level++)
+    {
+        auto reader = createReaderForCollection(fileCollection, fileLevelToGridLevelMap[level]);
+        SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ);
+
+        auto side = SideFactory::make(sideType);
+
+        precursorBoundaryCondition->side = side;
+        precursorBoundaryCondition->side->addIndices(grids, level, precursorBoundaryCondition);
+
+        boundaryConditions[level]->precursorBoundaryConditions.push_back(precursorBoundaryCondition);
+
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Precursor BC on level " << level << " with " << (int)precursorBoundaryCondition->indices.size() << "\n";
     }
 }
 
@@ -373,9 +413,9 @@ std::shared_ptr<Grid> LevelGridBuilder::getGrid(int level, int box)
 void LevelGridBuilder::checkLevel(int level)
 {
     if (level >= (int)grids.size())
-    { 
+    {
         std::cout << "wrong level input... return to caller\n";
-        return; 
+        return;
     }
 }
 
@@ -386,16 +426,16 @@ void LevelGridBuilder::getDimensions(int &nx, int &ny, int &nz, const int level)
     nz = grids[level]->getNumberOfNodesZ();
 }
 
-void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords, 
-                                     uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, 
+void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords,
+                                     uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative,
                                      uint *geo, const int level) const
 {
     grids[level]->getNodeValues(xCoords, yCoords, zCoords, neighborX, neighborY, neighborZ, neighborNegative, geo);
 }
 
 
-GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const 
-{ 
+GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const
+{
     grids[level]->getFluidNodeIndices(fluidNodeIndices);
 }
 
@@ -404,9 +444,9 @@ GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndicesBorder(uint *flui
     grids[level]->getFluidNodeIndicesBorder(fluidNodeIndices);
 }
 
-uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const 
+uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const
 {
-    return grids[level]->getNumberOfFluidNodes(); 
+    return grids[level]->getNumberOfFluidNodes();
 }
 
 GRIDGENERATOR_EXPORT uint LevelGridBuilder::getNumberOfFluidNodesBorder(unsigned int level) const
@@ -432,7 +472,7 @@ void LevelGridBuilder::getSlipValues(real* normalX, real* normalY, real* normalZ
         for (uint index = 0; index < boundaryCondition->indices.size(); index++)
         {
             indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[index]) + 1;
-            
+
             normalX[allIndicesCounter] = boundaryCondition->getNormalx(index);
             normalY[allIndicesCounter] = boundaryCondition->getNormaly(index);
             normalZ[allIndicesCounter] = boundaryCondition->getNormalz(index);
@@ -467,9 +507,9 @@ uint LevelGridBuilder::getStressSize(int level) const
     return size;
 }
 
-void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ, 
-                                        real* vx,      real* vy,      real* vz, 
-                                        real* vx1,     real* vy1,     real* vz1, 
+void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ,
+                                        real* vx,      real* vy,      real* vz,
+                                        real* vx1,     real* vy1,     real* vz1,
                                         int* indices, int* samplingIndices, int* samplingOffset, real* z0, int level) const
 {
 
@@ -525,7 +565,7 @@ void LevelGridBuilder::getVelocityValues(real* vx, real* vy, real* vz, int* indi
     {
         for (uint i = 0; i < (uint)boundaryCondition->indices.size(); i++)
         {
-            indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1;  
+            indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1;
 
             vx[allIndicesCounter] = boundaryCondition->getVx(i);
             vy[allIndicesCounter] = boundaryCondition->getVy(i);
@@ -594,11 +634,91 @@ void LevelGridBuilder::getPressureQs(real* qs[27], int level) const
     }
 }
 
+uint LevelGridBuilder::getPrecursorSize(int level) const
+{
+    uint size = 0;
+    for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
+    {
+        size += uint(boundaryCondition->indices.size());
+    }
+    return size;
+}
+
+void LevelGridBuilder::getPrecursorValues(  uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM,
+                                            real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM,
+                                            int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader,
+                                            int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads,
+                                            real& velocityX, real& velocityY, real& velocityZ, int level) const
+{
+    int allIndicesCounter = 0;
+    int allNodesCounter = 0;
+    uint tmpTimeStepsBetweenReads = 0;
+    size_t tmpNumberOfQuantities = 0;
+
+    for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
+    {
+        if( tmpTimeStepsBetweenReads == 0 )
+            tmpTimeStepsBetweenReads = boundaryCondition->timeStepsBetweenReads;
+        if( tmpTimeStepsBetweenReads != boundaryCondition->timeStepsBetweenReads )
+            throw std::runtime_error("All precursor boundary conditions must have the same timeStepsBetweenReads value");
+        auto BCreader = boundaryCondition->getReader();
+        BCreader->setWritingOffset(allIndicesCounter);
+        reader.push_back(BCreader);
+
+        std::vector<real> y, z;
+        real xTmp, yTmp, zTmp;
+        for(uint i = 0; i<boundaryCondition->indices.size(); i++)
+        {
+            indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) + 1;
+            grids[level]->transIndexToCoords(boundaryCondition->indices[i], xTmp, yTmp, zTmp);
+            y.push_back(yTmp);
+            z.push_back(zTmp);
+            allIndicesCounter++;
+        }
+        BCreader->fillArrays(y, z);
+        BCreader->getNeighbors(neighbor0PP, neighbor0PM, neighbor0MP, neighbor0MM);
+        BCreader->getWeights(weights0PP, weights0PM, weights0MP, weights0MM);
+        if(tmpNumberOfQuantities == 0)
+            tmpNumberOfQuantities = BCreader->getNumberOfQuantities();
+        if(tmpNumberOfQuantities != BCreader->getNumberOfQuantities())
+            throw std::runtime_error("All precursor files must have the same quantities.");
+        allNodesCounter += BCreader->getNPointsRead();
+        velocityX = boundaryCondition->getVelocityX();
+        velocityY = boundaryCondition->getVelocityY();
+        velocityZ = boundaryCondition->getVelocityZ();
+    }
+    numberOfPrecursorNodes = allNodesCounter;
+
+    if (tmpTimeStepsBetweenReads == 0)
+        throw std::runtime_error("timeStepsBetweenReads of precursor needs to be larger than 0.");
+    timeStepsBetweenReads = tmpTimeStepsBetweenReads;
+
+    if (tmpNumberOfQuantities == 0)
+        throw std::runtime_error("Number of quantities in precursor needs to be larger than 0.");
+    numberOfQuantities = tmpNumberOfQuantities;
+}
+
+void LevelGridBuilder::getPrecursorQs(real* qs[27], int level) const
+{
+    int allIndicesCounter = 0;
+    for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
+    {
+        for ( uint index = 0; index < boundaryCondition->indices.size(); index++ )
+        {
+            for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++)
+            {
+                qs[dir][allIndicesCounter] = boundaryCondition->qs[index][dir];
+            }
+            allIndicesCounter++;
+        }
+    }
+}
+
 uint LevelGridBuilder::getGeometrySize(int level) const
 {
     if (boundaryConditions[level]->geometryBoundaryCondition)
         return  (uint)boundaryConditions[level]->geometryBoundaryCondition->indices.size();
-    
+
     return 0;
 }
 
@@ -619,9 +739,9 @@ void LevelGridBuilder::getGeometryValues(real* vx, real* vy, real* vz, int level
 {
     for (uint i = 0; i < boundaryConditions[level]->geometryBoundaryCondition->indices.size(); i++)
     {
-		vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i);
-		vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i);
-		vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i);
+        vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i);
+        vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i);
+        vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i);
     }
 }
 
@@ -636,7 +756,7 @@ void LevelGridBuilder::getGeometryQs(real* qs[27], int level) const
     }
 }
 
-void LevelGridBuilder::writeArrows(std::string fileName) const 
+void LevelGridBuilder::writeArrows(std::string fileName) const
 {
     QLineWriter::writeArrows(fileName, boundaryConditions[getNumberOfGridLevels() - 1]->geometryBoundaryCondition, grids[getNumberOfGridLevels() - 1]);
 }
@@ -674,4 +794,65 @@ void LevelGridBuilder::findFluidNodes(bool splitDomain)
     for (uint i = 0; i < grids.size(); i++)
         grids[i]->findFluidNodeIndices(splitDomain);
     *logging::out << logging::Logger::INFO_HIGH << "Done with findFluidNodes()\n";
-}
\ No newline at end of file
+}
+
+
+void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level)
+{
+    grids[level]->addFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars);
+}
+
+void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level)
+{
+    grids[level]->addFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce);
+}
+
+void LevelGridBuilder::addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level)
+{
+    grids[level]->addFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures);
+}
+
+void LevelGridBuilder::sortFluidNodeIndicesMacroVars(uint level)
+{
+    grids[level]->sortFluidNodeIndicesMacroVars();
+}
+
+void LevelGridBuilder::sortFluidNodeIndicesApplyBodyForce(uint level)
+{
+    grids[level]->sortFluidNodeIndicesApplyBodyForce();
+}
+
+void LevelGridBuilder::sortFluidNodeIndicesAllFeatures(uint level)
+{
+    grids[level]->sortFluidNodeIndicesAllFeatures();
+}
+
+uint LevelGridBuilder::getNumberOfFluidNodesMacroVars(unsigned int level) const
+{
+    return grids[level]->getNumberOfFluidNodeIndicesMacroVars();
+}
+
+void LevelGridBuilder::getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const
+{
+    grids[level]->getFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars);
+}
+
+uint LevelGridBuilder::getNumberOfFluidNodesApplyBodyForce(unsigned int level) const
+{
+    return grids[level]->getNumberOfFluidNodeIndicesApplyBodyForce();
+}
+
+void LevelGridBuilder::getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const
+{
+    grids[level]->getFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce);
+}
+
+uint LevelGridBuilder::getNumberOfFluidNodesAllFeatures(unsigned int level) const
+{
+    return grids[level]->getNumberOfFluidNodeIndicesAllFeatures();
+}
+
+void LevelGridBuilder::getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const
+{
+    grids[level]->getFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures);
+}
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index afb027fc1665ab874523bf39ec2a05518d28f7a1..2e0eaf13080c46260de2a0c845fbf784a2cc3e09 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -38,6 +38,8 @@
 #include <memory>
 #include <array>
 
+#include <lbm/constants/NumericConstants.h>
+
 #include "gpu/GridGenerator/global.h"
 
 #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
@@ -45,6 +47,8 @@
 #include "gpu/GridGenerator/grid/GridInterface.h"
 #include "gpu/GridGenerator/grid/NodeValues.h"
 
+using namespace vf::lbm::constant;
+
 struct Vertex;
 class  Grid;
 class Transformator;
@@ -58,9 +62,11 @@ class SlipBoundaryCondition;
 class StressBoundaryCondition;
 class PressureBoundaryCondition;
 class GeometryBoundaryCondition;
+class PrecursorBoundaryCondition;
 enum class SideType;
 
-
+class TransientBCInputFileReader;
+class FileCollection;
 
 class LevelGridBuilder : public GridBuilder
 {
@@ -75,11 +81,14 @@ public:
     GRIDGENERATOR_EXPORT  ~LevelGridBuilder() override;
 
     GRIDGENERATOR_EXPORT void setSlipBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ);
-    GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0);
+    GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0, real dx);
     GRIDGENERATOR_EXPORT void setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz);
     GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho);
     GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z);
     GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType);
+    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads,
+                                                            real velocityX=c0o1, real velocityY=c0o1, real velocityZ=c0o1,
+                                                            std::vector<uint> fileLevelToGridLevelMap = {});
 
     GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall);
 
@@ -97,7 +106,7 @@ public:
     GRIDGENERATOR_EXPORT virtual void getFluidNodeIndicesBorder(uint *fluidNodeIndices, const int level) const override;
 
     GRIDGENERATOR_EXPORT virtual void getNodeValues(real *xCoords, real *yCoords, real *zCoords,
-                                         uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, 
+                                         uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative,
                                          uint *geo, const int level) const override;
     GRIDGENERATOR_EXPORT virtual void getDimensions(int &nx, int &ny, int &nz, const int level) const override;
 
@@ -107,12 +116,12 @@ public:
     GRIDGENERATOR_EXPORT virtual void getSlipQs(real* qs[27], int level) const override;
 
     GRIDGENERATOR_EXPORT uint getStressSize(int level) const override;
-    GRIDGENERATOR_EXPORT virtual void getStressValues(  real* normalX, real* normalY, real* normalZ, 
-                                                        real* vx,      real* vy,      real* vz, 
-                                                        real* vx1,     real* vy1,     real* vz1, 
+    GRIDGENERATOR_EXPORT virtual void getStressValues(  real* normalX, real* normalY, real* normalZ,
+                                                        real* vx,      real* vy,      real* vz,
+                                                        real* vx1,     real* vy1,     real* vz1,
                                                         int* indices, int* samplingIndices, int* samplingOffsets, real* z0, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getStressQs(real* qs[27], int level) const override;
-        
+
     GRIDGENERATOR_EXPORT uint getVelocitySize(int level) const override;
     GRIDGENERATOR_EXPORT virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getVelocityQs(real* qs[27], int level) const override;
@@ -121,6 +130,14 @@ public:
     GRIDGENERATOR_EXPORT void getPressureValues(real* rho, int* indices, int* neighborIndices, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getPressureQs(real* qs[27], int level) const override;
 
+    GRIDGENERATOR_EXPORT uint getPrecursorSize(int level) const override;
+    GRIDGENERATOR_EXPORT void getPrecursorValues(   uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM,
+                                                    real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM,
+                                                    int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader,
+                                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads,
+                                                    real& velocityX, real& velocityY, real& velocityZ, int level) const override;
+    GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override;
+
     GRIDGENERATOR_EXPORT virtual void getGeometryQs(real *qs[27], int level) const override;
     GRIDGENERATOR_EXPORT virtual uint getGeometrySize(int level) const override;
     GRIDGENERATOR_EXPORT virtual void getGeometryIndices(int *indices, int level) const override;
@@ -133,11 +150,11 @@ public:
     GRIDGENERATOR_EXPORT SPtr<GeometryBoundaryCondition> getGeometryBoundaryCondition(uint level) const override;
 
 protected:
-    
+
 
     struct BoundaryConditions
     {
-		BoundaryConditions() = default;
+        BoundaryConditions() = default;
 
         std::vector<SPtr<SlipBoundaryCondition>> slipBoundaryConditions;
 
@@ -149,13 +166,15 @@ protected:
 
         std::vector<SPtr<VelocityBoundaryCondition>> noSlipBoundaryConditions;
 
+        std::vector<SPtr<PrecursorBoundaryCondition>> precursorBoundaryConditions;
+
         SPtr<GeometryBoundaryCondition> geometryBoundaryCondition;
     };
     bool geometryHasValues = false;
 
     std::vector<std::shared_ptr<Grid> > grids;
     std::vector<SPtr<BoundaryConditions> > boundaryConditions;
-    
+
     std::array<uint, 6> communicationProcesses;
 
     void checkLevel(int level);
@@ -194,7 +213,21 @@ public:
 
     // needed for CUDA Streams MultiGPU (Communication Hiding)
     void findFluidNodes(bool splitDomain) override;
+
+    void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) override;
+    void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) override;
+    void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) override;
+
+    void sortFluidNodeIndicesMacroVars(uint level) override;
+    void sortFluidNodeIndicesApplyBodyForce(uint level) override;
+    void sortFluidNodeIndicesAllFeatures(uint level) override;
+
+    uint getNumberOfFluidNodesMacroVars(unsigned int level) const override;
+    void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const override;
+    uint getNumberOfFluidNodesApplyBodyForce(unsigned int level) const override;
+    void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const override;
+    uint getNumberOfFluidNodesAllFeatures(unsigned int level) const override;
+    void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const override;
 };
 
 #endif
-
diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp
index 31bbf3ddc87184846fcb01a3e6631358b6a6f864..32cf9d07da87149695a5bf548ed357be2b2f71b4 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cpp
+++ b/src/gpu/GridGenerator/grid/GridImp.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -33,7 +33,6 @@
 #include "GridImp.h"
 
 #include <iostream>
-#include <omp.h>
 #include <sstream>
 # include <algorithm>
 #include <cmath>
@@ -61,8 +60,8 @@ int DIRECTIONS[DIR_END_MAX][DIMENSION];
 
 using namespace vf::gpu;
 
-GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level) 
-            : object(object), 
+GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level)
+            : object(object),
     startX(startX),
     startY(startY),
     startZ(startZ),
@@ -135,7 +134,7 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->initalNodeToOutOfGrid(index);
-    
+
     if( this->innerRegionFromFinerGrid ){
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start setInnerBasedOnFinerGrid()\n";
         this->setInnerBasedOnFinerGrid(fineGrid);
@@ -147,12 +146,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start addOverlap()\n";
     this->addOverlap();
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixOddCells()\n";
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->fixOddCell(index);
-    
+
     if( enableFixRefinementIntoTheWall )
     {
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixRefinementIntoWall()\n";
@@ -180,12 +179,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
             }
         }
     }
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start findEndOfGridStopperNodes()\n";
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->findEndOfGridStopperNode(index);
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE
         << "Grid created: " << "from (" << this->startX << ", " << this->startY << ", " << this->startZ << ") to (" << this->endX << ", " << this->endY << ", " << this->endZ << ")\n"
         << "nodes: " << this->nx << " x " << this->ny << " x " << this->nz << " = " << this->size << "\n";
@@ -209,9 +208,9 @@ void GridImp::freeMemory()
     if( this->neighborIndexZ        != nullptr ) { delete[] this->neighborIndexZ;        this->neighborIndexZ        = nullptr; }
     if( this->neighborIndexNegative != nullptr ) { delete[] this->neighborIndexNegative; this->neighborIndexNegative = nullptr; }
     if( this->sparseIndices         != nullptr ) { delete[] this->sparseIndices;         this->sparseIndices         = nullptr; }
-	if( this->qIndices              != nullptr ) { delete[] this->qIndices;              this->qIndices              = nullptr; }
-	if( this->qValues               != nullptr ) { delete[] this->qValues;               this->qValues               = nullptr; }
-	if( this->qPatches              != nullptr ) { delete[] this->qPatches;              this->qPatches              = nullptr; }
+    if( this->qIndices              != nullptr ) { delete[] this->qIndices;              this->qIndices              = nullptr; }
+    if( this->qValues               != nullptr ) { delete[] this->qValues;               this->qValues               = nullptr; }
+    if( this->qPatches              != nullptr ) { delete[] this->qPatches;              this->qPatches              = nullptr; }
 
     field.freeMemory();
 }
@@ -254,7 +253,7 @@ void GridImp::discretize(Object* solidObject, char innerType, char outerType)
         this->sparseIndices[index] = index;
 
         if( this->getFieldEntry(index) == innerType ) continue;
-        
+
         real x, y, z;
         this->transIndexToCoords(index, x, y, z);
 
@@ -279,7 +278,7 @@ bool GridImp::isInside(const Cell& cell) const
 //    |       +-----+-----+-----+           | +-----+-----+-----+
 //    +---------+                           +---------+
 //               0     1     2                   0     1     2
-//              even      even                        even     
+//              even      even                        even
 //                   odd                        odd         odd
 //
 Cell GridImp::getOddCellFromIndex(uint index) const
@@ -349,7 +348,7 @@ void GridImp::addOverlap()
 void GridImp::setOverlapTmp( uint index )
 {
     if( this->field.is( index, INVALID_OUT_OF_GRID ) ){
-        
+
         if( this->hasNeighborOfType(index, FLUID) ){
             this->field.setFieldEntry( index, OVERLAP_TMP );
         }
@@ -380,7 +379,7 @@ void GridImp::fixRefinementIntoWall(uint xIndex, uint yIndex, uint zIndex, int d
     if(  this->xOddStart && ( dir == 1 || dir == -1 ) && ( xIndex % 2 == 0 && xIndex != 0 ) ) return;
     if(  this->yOddStart && ( dir == 2 || dir == -2 ) && ( yIndex % 2 == 0 && yIndex != 0 ) ) return;
     if(  this->zOddStart && ( dir == 3 || dir == -3 ) && ( zIndex % 2 == 0 && zIndex != 0 ) ) return;
-    
+
     //////////////////////////////////////////////////////////////////////////
 
     real dx{ 0.0 }, dy{ 0.0 }, dz{ 0.0 };
@@ -433,31 +432,31 @@ void GridImp::findStopperNode(uint index) // deprecated
 
 void GridImp::findEndOfGridStopperNode(uint index)
 {
-	if (isValidEndOfGridStopper(index)){
+    if (isValidEndOfGridStopper(index)){
         if( this->level != 0 )
-		    this->field.setFieldEntryToStopperOutOfGrid(index);
+            this->field.setFieldEntryToStopperOutOfGrid(index);
         else
             this->field.setFieldEntryToStopperOutOfGridBoundary(index);
     }
-    
-	if (isValidEndOfGridBoundaryStopper(index))
-		this->field.setFieldEntryToStopperOutOfGridBoundary(index);
+
+    if (isValidEndOfGridBoundaryStopper(index))
+        this->field.setFieldEntryToStopperOutOfGridBoundary(index);
 }
 
 void GridImp::findSolidStopperNode(uint index)
 {
-	if (isValidSolidStopper(index))
-		this->field.setFieldEntry(index, STOPPER_SOLID);
+    if (isValidSolidStopper(index))
+        this->field.setFieldEntry(index, STOPPER_SOLID);
 }
 
 void GridImp::findBoundarySolidNode(uint index)
 {
-	if (shouldBeBoundarySolidNode(index)) 
-	{
-		this->field.setFieldEntry(index, BC_SOLID);
-		this->qIndices[index] = this->numberOfSolidBoundaryNodes++;
-		//grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1);
-	}
+    if (shouldBeBoundarySolidNode(index))
+    {
+        this->field.setFieldEntry(index, BC_SOLID);
+        this->qIndices[index] = this->numberOfSolidBoundaryNodes++;
+        //grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1);
+    }
 }
 
 void GridImp::fixOddCell(uint index)
@@ -483,9 +482,9 @@ bool GridImp::isOutSideOfGrid(Cell &cell) const
 bool GridImp::contains(Cell &cell, char type) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
-			continue;
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
+            continue;
         if (field.is(index, type))
             return true;
     }
@@ -495,8 +494,8 @@ bool GridImp::contains(Cell &cell, char type) const
 bool GridImp::cellContainsOnly(Cell &cell, char type) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
             return false;
         if (!field.is(index, type))
             return false;
@@ -507,8 +506,8 @@ bool GridImp::cellContainsOnly(Cell &cell, char type) const
 bool GridImp::cellContainsOnly(Cell &cell, char typeA, char typeB) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
             return false;
         if (!field.is(index, typeA) && !field.is(index, typeB))
             return false;
@@ -524,91 +523,91 @@ const Object * GridImp::getObject() const
 void GridImp::setNodeTo(Cell &cell, char type)
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
-			continue;
-		field.setFieldEntry(index, type);
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
+            continue;
+        field.setFieldEntry(index, type);
     }
 }
 
 void GridImp::setNodeTo(uint index, char type)
 {
-	if( index != INVALID_INDEX )
-		field.setFieldEntry(index, type);
+    if( index != INVALID_INDEX )
+        field.setFieldEntry(index, type);
 }
 
 bool GridImp::isNode(uint index, char type) const
 {
     if( index != INVALID_INDEX )
-		return field.is(index, type);
+        return field.is(index, type);
 
     throw std::runtime_error("GridImp::isNode() -> index == INVALID_INDEX not supported.");
 }
 
 bool GridImp::isValidEndOfGridStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, INVALID_OUT_OF_GRID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, INVALID_OUT_OF_GRID))
+        return false;
 
-	return hasNeighborOfType(index, FLUID);
+    return hasNeighborOfType(index, FLUID);
 }
 
 bool GridImp::isValidEndOfGridBoundaryStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, FLUID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, FLUID))
+        return false;
 
-	return ! hasAllNeighbors(index);
+    return ! hasAllNeighbors(index);
 }
 
 bool GridImp::isValidSolidStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, INVALID_SOLID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, INVALID_SOLID))
+        return false;
 
-	return hasNeighborOfType(index, FLUID);
+    return hasNeighborOfType(index, FLUID);
 }
 
 bool GridImp::shouldBeBoundarySolidNode(uint index) const
 {
-	if (!this->field.is(index, FLUID))
-		return false;
+    if (!this->field.is(index, FLUID))
+        return false;
 
-	return hasNeighborOfType(index, STOPPER_SOLID);
+    return hasNeighborOfType(index, STOPPER_SOLID);
 }
 
 bool GridImp::hasAllNeighbors(uint index) const
 {
-	// new version by Lenz, utilizes the range based for loop for all directions
-	real x, y, z;
-	this->transIndexToCoords(index, x, y, z);
-	for (const auto dir : this->distribution) {
-		const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
+    // new version by Lenz, utilizes the range based for loop for all directions
+    real x, y, z;
+    this->transIndexToCoords(index, x, y, z);
+    for (const auto dir : this->distribution) {
+        const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
 
-		if (neighborIndex == INVALID_INDEX) return false;
-	}
+        if (neighborIndex == INVALID_INDEX) return false;
+    }
 
-	return true;
+    return true;
 }
 
 bool GridImp::hasNeighborOfType(uint index, char type) const
 {
-	// new version by Lenz, utilizes the range based for loop for all directions
-	real x, y, z;
-	this->transIndexToCoords(index, x, y, z);
-	for (const auto dir : this->distribution) {
-		const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
+    // new version by Lenz, utilizes the range based for loop for all directions
+    real x, y, z;
+    this->transIndexToCoords(index, x, y, z);
+    for (const auto dir : this->distribution) {
+        const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		if (this->field.is(neighborIndex, type))
-			return true;
-	}
+        if (this->field.is(neighborIndex, type))
+            return true;
+    }
 
-	return false;
+    return false;
 }
 
 bool GridImp::nodeInNextCellIs(int index, char type) const
@@ -630,13 +629,13 @@ bool GridImp::nodeInNextCellIs(int index, char type) const
 
     const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ);
 
-	const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX, type);
-	const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY, type);
-	const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY, type);
-	const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ, type);
-	const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ, type);
-	const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ, type);
-	const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
+    const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX, type);
+    const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY, type);
+    const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY, type);
+    const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ, type);
+    const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ, type);
+    const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ, type);
+    const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
 
     return typeX || typeY || typeXY || typeZ || typeYZ
         || typeXZ || typeXYZ;
@@ -661,13 +660,13 @@ bool GridImp::nodeInPreviousCellIs(int index, char type) const
 
     const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ);
 
-	const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX  , type);
-	const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY  , type);
-	const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY , type);
-	const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ  , type);
-	const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ , type);
-	const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ , type);
-	const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
+    const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX  , type);
+    const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY  , type);
+    const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY , type);
+    const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ  , type);
+    const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ , type);
+    const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ , type);
+    const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
 
     return typeX || typeY || typeXY || typeZ || typeYZ
         || typeXZ || typeXYZ;
@@ -678,8 +677,8 @@ bool GridImp::nodeInCellIs(Cell& cell, char type) const
     for (const auto node : cell)
     {
         const uint index = transCoordToIndex(node.x, node.y, node.z);
-		if (index == INVALID_INDEX)
-			continue;
+        if (index == INVALID_INDEX)
+            continue;
         if (field.is(index, type))
             return true;
     }
@@ -696,9 +695,9 @@ void GridImp::setCellTo(uint index, char type)
     for (const auto node : cell)
     {
         const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z);
-		if (nodeIndex == INVALID_INDEX)
-			continue;
-		this->field.setFieldEntry(nodeIndex, type);
+        if (nodeIndex == INVALID_INDEX)
+            continue;
+        this->field.setFieldEntry(nodeIndex, type);
     }
 }
 
@@ -712,15 +711,21 @@ void GridImp::setNonStopperOutOfGridCellTo(uint index, char type)
     for (const auto node : cell)
     {
         const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z);
-		if (nodeIndex == INVALID_INDEX)
-			continue;
+        if (nodeIndex == INVALID_INDEX)
+            continue;
 
-        if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID && 
+        if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID &&
             this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID_BOUNDARY )
             this->field.setFieldEntry(nodeIndex, type);
     }
 }
 
+bool GridImp::nodeHasBC(uint index) const
+{
+    return (getFieldEntry(index) == vf::gpu::BC_PRESSURE || getFieldEntry(index) == vf::gpu::BC_VELOCITY ||
+            getFieldEntry(index) == vf::gpu::BC_NOSLIP   || getFieldEntry(index) == vf::gpu::BC_SLIP     ||
+            getFieldEntry(index) == vf::gpu::BC_STRESS);
+}
 
 void GridImp::setPeriodicity(bool periodicityX, bool periodicityY, bool periodicityZ)
 {
@@ -744,17 +749,17 @@ void GridImp::setPeriodicityZ(bool periodicity)
     this->periodicityZ = periodicity;
 }
 
-bool GridImp::getPeriodicityX()
+bool GridImp::getPeriodicityX() const
 {
     return this->periodicityX;
 }
 
-bool GridImp::getPeriodicityY()
+bool GridImp::getPeriodicityY() const
 {
     return this->periodicityY;
 }
 
-bool GridImp::getPeriodicityZ()
+bool GridImp::getPeriodicityZ() const
 {
     return this->periodicityZ;
 }
@@ -770,7 +775,7 @@ uint GridImp::transCoordToIndex(const real &x, const real &y, const real &z) con
     const uint yIndex = getYIndex(y);
     const uint zIndex = getZIndex(z);
 
-	if (xIndex >= nx || yIndex >= ny || zIndex >= nz)
+    if (xIndex >= nx || yIndex >= ny || zIndex >= nz)
         return INVALID_INDEX;
 
     return xIndex + nx * (yIndex + ny * zIndex);
@@ -819,20 +824,20 @@ TriangularMeshDiscretizationStrategy * GridImp::getTriangularMeshDiscretizationS
 
 uint GridImp::getNumberOfSolidBoundaryNodes() const
 {
-	return this->numberOfSolidBoundaryNodes;
+    return this->numberOfSolidBoundaryNodes;
 }
 
 void GridImp::setNumberOfSolidBoundaryNodes(uint numberOfSolidBoundaryNodes)
 {
-	if (numberOfSolidBoundaryNodes < INVALID_INDEX)
-		this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes;
+    if (numberOfSolidBoundaryNodes < INVALID_INDEX)
+        this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes;
 }
 
 real GridImp::getQValue(const uint index, const uint dir) const
 {
-	const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index];
+    const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index];
 
-	return this->qValues[qIndex];
+    return this->qValues[qIndex];
 }
 
 uint GridImp::getQPatch(const uint index) const
@@ -858,7 +863,7 @@ void GridImp::findSparseIndices(SPtr<Grid> finerGrid)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Find sparse indices...";
     auto fineGrid = std::static_pointer_cast<GridImp>(finerGrid);
-    
+
     this->updateSparseIndices();
 
 #pragma omp parallel for
@@ -906,7 +911,7 @@ void GridImp::updateSparseIndices()
     sparseSize = size - removedNodes;
 }
 
-void GridImp::findFluidNodeIndices(bool splitDomain) 
+void GridImp::findFluidNodeIndices(bool splitDomain)
 {
     // find sparse index of all fluid nodes
     this->fluidNodeIndices.clear();
@@ -935,7 +940,7 @@ void GridImp::findFluidNodeIndicesBorder() {
     // resize fluidNodeIndicesBorder (for better performance in copy operation)
     size_t newSize = 0;
     for (CommunicationIndices& ci : this->communicationIndices)
-        newSize += ci.sendIndices.size();    
+        newSize += ci.sendIndices.size();
     this->fluidNodeIndicesBorder.reserve(newSize);
 
     // copy all send indices to fluidNodeIndicesBorder
@@ -968,7 +973,7 @@ void GridImp::setNeighborIndices(uint index)
         this->setStopperNeighborCoords(index);
         return;
     }
-     
+
     if (this->sparseIndices[index] == -1)
         return;
 
@@ -1002,9 +1007,9 @@ void GridImp::setStopperNeighborCoords(uint index)
     if (vf::Math::lessEqual(z + delta, endZ + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y, z + delta)))
         neighborIndexZ[index] = getSparseIndex(x, y, z + delta);
 
-    if (vf::Math::greaterEqual(x - delta, endX) && 
-        vf::Math::greaterEqual(y - delta, endY) && 
-        vf::Math::greaterEqual(z - delta, endZ) && 
+    if (vf::Math::greaterEqual(x - delta, endX) &&
+        vf::Math::greaterEqual(y - delta, endY) &&
+        vf::Math::greaterEqual(z - delta, endZ) &&
         !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x - delta, y - delta, z - delta)))
     {
         neighborIndexNegative[index] = getSparseIndex(x - delta, y - delta, z - delta);
@@ -1035,7 +1040,7 @@ real GridImp::getNeighborCoord(bool periodicity, real startCoord, real coords[3]
             return coords[direction] + delta;
 
     }
-    
+
     return coords[direction] + delta;
 }
 
@@ -1061,7 +1066,7 @@ real GridImp::getNegativeNeighborCoord(bool periodicity, real startCoord, real c
 
         return getLastFluidNode(coords, direction, startCoord);
     }
-    
+
     return coords[direction] - delta;
 }
 
@@ -1155,7 +1160,7 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
             if( lbmOrGks == LBM )
                 tmpSubDomainBox.extend(this->delta);
 
-            if (!tmpSubDomainBox.isInside(x, y, z) 
+            if (!tmpSubDomainBox.isInside(x, y, z)
                 && ( this->getFieldEntry(index) == FLUID ||
                      this->getFieldEntry(index) == FLUID_CFC ||
                      this->getFieldEntry(index) == FLUID_CFF ||
@@ -1184,13 +1189,13 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
 
 void GridImp::findGridInterfaceCF(uint index, GridImp& finerGrid, LbmOrGks lbmOrGks)
 {
-	if (lbmOrGks == LBM)
-	{
-		gridInterface->findInterfaceCF            (index, this, &finerGrid);
-		gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
-	}
-	else if (lbmOrGks == GKS)
-		gridInterface->findInterfaceCF_GKS(index, this, &finerGrid);
+    if (lbmOrGks == LBM)
+    {
+        gridInterface->findInterfaceCF            (index, this, &finerGrid);
+        gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
+    }
+    else if (lbmOrGks == GKS)
+        gridInterface->findInterfaceCF_GKS(index, this, &finerGrid);
 }
 
 void GridImp::findGridInterfaceFC(uint index, GridImp& finerGrid)
@@ -1217,16 +1222,16 @@ void GridImp::mesh(Object* object)
     if (triangularMesh)
         triangularMeshDiscretizationStrategy->discretize(triangularMesh, this, INVALID_SOLID, FLUID);
     else
-		//new method for geometric primitives (not cell based) to be implemented
+        //new method for geometric primitives (not cell based) to be implemented
         this->discretize(object, INVALID_SOLID, FLUID);
 
     this->closeNeedleCells();
 
-	#pragma omp parallel for
+    #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->findSolidStopperNode(index);
 
-	//#pragma omp parallel for
+    //#pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++) {
         this->findBoundarySolidNode(index);
     }
@@ -1359,7 +1364,7 @@ void GridImp::findQs(Object* object) //TODO: enable qs for primitive objects
         findQsPrimitive(object);
 }
 
-void GridImp::allocateQs() 
+void GridImp::allocateQs()
 {
     this->qPatches = new uint[this->getNumberOfSolidBoundaryNodes()];
 
@@ -1379,8 +1384,8 @@ void GridImp::findQs(TriangularMesh &triangularMesh)
 
     if( this->qComputationStage == qComputationStageType::ComputeQs )
         allocateQs();
-    
-    
+
+
 #pragma omp parallel for
     for (int i = 0; i < triangularMesh.size; i++)
         this->findQs(triangularMesh.triangles[i]);
@@ -1406,15 +1411,15 @@ void GridImp::findQs(Triangle &triangle)
                 //if (!field.isFluid(index))
                 //    continue;
 
-				if( index == INVALID_INDEX ) continue;
+                if( index == INVALID_INDEX ) continue;
 
                 const Vertex point(x, y, z);
 
                 if( this->qComputationStage == qComputationStageType::ComputeQs ){
                     if(this->field.is(index, BC_SOLID))
                     {
-					    calculateQs(index, point, triangle);
-				    }
+                        calculateQs(index, point, triangle);
+                    }
                 }
                 else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes )
                 {
@@ -1449,14 +1454,14 @@ void GridImp::findQsPrimitive(Object * object)
         real x,y,z;
 
         this->transIndexToCoords(index,x,y,z);
-        
+
         const Vertex point(x, y, z);
 
         if( this->qComputationStage == qComputationStageType::ComputeQs ){
             if(this->field.is(index, BC_SOLID))
             {
-				calculateQs(index, point, object);
-			}
+                calculateQs(index, point, object);
+            }
         }
         else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes )
         {
@@ -1477,66 +1482,66 @@ void GridImp::calculateQs(const uint index, const Vertex &point, Object* object)
 {
     Vertex pointOnTriangle, direction;
 
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													    point.y + direction.y * this->delta,
-													    point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                        point.y + direction.y * this->delta,
+                                                        point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
+        error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
                     subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] )
-			{
+            {
+
+                this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
 
-				this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
-                    
                 this->qPatches[ this->qIndices[index] ] = 0;
 
-			}
-		}
-	}
+            }
+        }
+    }
 }
 
 bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex &point, Object* object) const
 {
     Vertex pointOnTriangle, direction;
 
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
+        error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			return true;
-		}
-	}
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            return true;
+        }
+    }
     return false;
 }
 
@@ -1565,7 +1570,7 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const
 
         error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
         if (error == 0 && subdistance < 1.0 && subdistance > 0.0)
         {
@@ -1577,81 +1582,80 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const
 
 void GridImp::calculateQs(const uint index, const Vertex &point, const Triangle &triangle) const
 {
-	Vertex pointOnTriangle, direction;
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
+    Vertex pointOnTriangle, direction;
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
 #if defined(__CUDA_ARCH__)
-		direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
+        direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
 #else
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 #endif
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
+        error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
                  subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] )
-			{
-				this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
+            {
+                this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
 
                 this->qPatches[ this->qIndices[index] ] = triangle.patchIndex;
-			}
-		}
-	}
+            }
+        }
+    }
 }
 
 bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex & point, const Triangle & triangle) const
 {
-	Vertex pointOnTriangle, direction;
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
+    Vertex pointOnTriangle, direction;
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
 #if defined(__CUDA_ARCH__)
-		direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
+        direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
 #else
-		direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]), 
+        direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]),
                            real(distribution.dirs[i * DIMENSION + 1]),
-			               real(distribution.dirs[i * DIMENSION + 2]));
+                           real(distribution.dirs[i * DIMENSION + 2]));
 #endif
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
-		if (neighborIndex == INVALID_INDEX) continue;
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
+        error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			return true;
-		}
-	}
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            return true;
+        }
+    }
     return false;
 }
 
 void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks)
 {
     for( uint index = 0; index < this->size; index++ ){
-        
         real x, y, z;
         this->transIndexToCoords(index, x, y, z);
-    
+
         if( this->getFieldEntry(index) == INVALID_OUT_OF_GRID ||
             this->getFieldEntry(index) == INVALID_SOLID ||
             this->getFieldEntry(index) == INVALID_COARSE_UNDER_FINE ||
@@ -1660,7 +1664,6 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai
 
         if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_OUT_OF_GRID_BOUNDARY ) continue;
         if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_SOLID ) continue;
-
         if( direction == CommunicationDirections::MX ) findCommunicationIndex( index, x, subDomainBox->minX, direction);
         if( direction == CommunicationDirections::PX ) findCommunicationIndex( index, x, subDomainBox->maxX, direction);
         if( direction == CommunicationDirections::MY ) findCommunicationIndex( index, y, subDomainBox->minY, direction);
@@ -1672,16 +1675,13 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai
 
 void GridImp::findCommunicationIndex( uint index, real coordinate, real limit, int direction ){
     // negative direction get a negative sign
-    real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 );  
-
+    real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 );
 
-	if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta) {
-		this->communicationIndices[direction].receiveIndices.push_back(index);
-	}
+    if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta)
+        this->communicationIndices[direction].receiveIndices.push_back(index);
 
-	if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta) {
-		this->communicationIndices[direction].sendIndices.push_back(index);
-	}
+    if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta)
+        this->communicationIndices[direction].sendIndices.push_back(index);
 }
 
 bool GridImp::isSendNode(int index) const
@@ -1727,14 +1727,14 @@ uint GridImp::getReceiveIndex(int direction, uint index)
 
 void GridImp::repairCommunicationIndices(int direction)
 {
-    this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(), 
-                                                              this->communicationIndices[direction+1].sendIndices.begin(), 
+    this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(),
+                                                              this->communicationIndices[direction+1].sendIndices.begin(),
                                                               this->communicationIndices[direction+1].sendIndices.end() );
 
 
 
-    this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(), 
-                                                                 this->communicationIndices[direction].receiveIndices.begin(), 
+    this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(),
+                                                                 this->communicationIndices[direction].receiveIndices.begin(),
                                                                  this->communicationIndices[direction].receiveIndices.end() );
 
     this->communicationIndices[direction].receiveIndices = this->communicationIndices[direction+1].receiveIndices;
@@ -1839,19 +1839,19 @@ real GridImp::getMaximumOnNodes(const real &maxExact, const real &decimalStart,
     return maxNode;
 }
 
-uint GridImp::getXIndex(real x) const 
-{ 
-    return std::lround((x - startX) / delta); 
+uint GridImp::getXIndex(real x) const
+{
+    return std::lround((x - startX) / delta);
 }
 
 uint GridImp::getYIndex(real y) const
-{ 
-    return std::lround((y - startY) / delta); 
+{
+    return std::lround((y - startY) / delta);
 }
 
 uint GridImp::getZIndex(real z) const
-{ 
-    return std::lround((z - startZ) / delta); 
+{
+    return std::lround((z - startZ) / delta);
 }
 
 real GridImp::getDelta() const
@@ -1866,11 +1866,11 @@ uint GridImp::getSize() const
 
 uint GridImp::getSparseSize() const
 {
-    return this->sparseSize; 
+    return this->sparseSize;
 }
 
-uint GridImp::getNumberOfFluidNodes() const { 
-    return (uint)this->fluidNodeIndices.size(); 
+uint GridImp::getNumberOfFluidNodes() const {
+    return (uint)this->fluidNodeIndices.size();
 }
 
 Field GridImp::getField() const
@@ -2063,23 +2063,147 @@ void GridImp::getNodeValues(real *xCoords, real *yCoords, real *zCoords, uint *n
     }
 }
 
-void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const 
-{ 
+void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const
+{
     for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndices.size(); nodeNumber++)
         fluidNodeIndices[nodeNumber] = this->fluidNodeIndices[nodeNumber];
 }
 
-uint GridImp::getNumberOfFluidNodesBorder() const 
-{ 
-    return (uint)this->fluidNodeIndicesBorder.size(); 
+uint GridImp::getNumberOfFluidNodesBorder() const
+{
+    return (uint)this->fluidNodeIndicesBorder.size();
 }
 
-void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const 
+void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const
 {
     for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndicesBorder.size(); nodeNumber++)
         fluidNodeIndicesBorder[nodeNumber] = this->fluidNodeIndicesBorder[nodeNumber];
 }
 
+void GridImp::addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars)
+{
+    size_t newSize = this->fluidNodeIndicesMacroVars.size()+_fluidNodeIndicesMacroVars.size();
+    this->fluidNodeIndicesMacroVars.reserve(newSize);
+    std::copy(_fluidNodeIndicesMacroVars.begin(), _fluidNodeIndicesMacroVars.end(), std::back_inserter(this->fluidNodeIndicesMacroVars));
+}
+
+void GridImp::addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce)
+{
+
+    size_t newSize = this->fluidNodeIndicesApplyBodyForce.size()+_fluidNodeIndicesApplyBodyForce.size();
+    this->fluidNodeIndicesApplyBodyForce.reserve(newSize);
+    std::copy(_fluidNodeIndicesApplyBodyForce.begin(), _fluidNodeIndicesApplyBodyForce.end(), std::back_inserter(this->fluidNodeIndicesApplyBodyForce));
+}
+
+void GridImp::addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures)
+{
+
+    size_t newSize = this->fluidNodeIndicesAllFeatures.size()+_fluidNodeIndicesAllFeatures.size();
+    this->fluidNodeIndicesAllFeatures.reserve(newSize);
+    std::copy(_fluidNodeIndicesAllFeatures.begin(), _fluidNodeIndicesAllFeatures.end(), std::back_inserter(this->fluidNodeIndicesAllFeatures));
+}
+
+void GridImp::sortFluidNodeIndicesMacroVars()
+{
+    if(this->fluidNodeIndicesMacroVars.size()>0)
+    {
+        sort(this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end());
+        // Remove duplicates
+        this->fluidNodeIndicesMacroVars.erase( unique( this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end() ), this->fluidNodeIndicesMacroVars.end() );
+
+         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
+        if(this->fluidNodeIndicesAllFeatures.size()>0)
+        {
+            this->fluidNodeIndicesMacroVars.erase(   std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
+                                            this->fluidNodeIndicesMacroVars.end()
+                                        );
+        }
+
+        // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ),
+                                        this->fluidNodeIndices.end()
+                                    );
+    }
+}
+
+void GridImp::sortFluidNodeIndicesApplyBodyForce()
+{
+    if(this->fluidNodeIndicesApplyBodyForce.size()>0)
+    {
+        sort(this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end());
+        // Remove duplicates
+        this->fluidNodeIndicesApplyBodyForce.erase( unique( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end() ), this->fluidNodeIndicesApplyBodyForce.end() );
+
+         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
+        if(this->fluidNodeIndicesAllFeatures.size()>0)
+        {
+            this->fluidNodeIndicesApplyBodyForce.erase(   std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
+                                            this->fluidNodeIndicesApplyBodyForce.end()
+                                        );
+        }
+
+        // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
+                                        this->fluidNodeIndices.end()
+                                    );
+    }
+}
+
+void GridImp::sortFluidNodeIndicesAllFeatures()
+{
+    if(this->fluidNodeIndicesAllFeatures.size()>0)
+    {
+        sort(this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end());
+        // Remove duplicates
+        this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() );
+        // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
+                                        this->fluidNodeIndices.end()
+                                    );
+    }
+}
+
+uint GridImp::getNumberOfFluidNodeIndicesMacroVars() const {
+    return (uint)this->fluidNodeIndicesMacroVars.size();
+}
+
+uint GridImp::getNumberOfFluidNodeIndicesApplyBodyForce() const {
+    return (uint)this->fluidNodeIndicesApplyBodyForce.size();
+}
+
+uint GridImp::getNumberOfFluidNodeIndicesAllFeatures() const {
+    return (uint)this->fluidNodeIndicesAllFeatures.size();
+}
+
+void GridImp::getFluidNodeIndicesMacroVars(uint *_fluidNodeIndicesMacroVars) const
+{
+    std::copy(fluidNodeIndicesMacroVars.begin(), fluidNodeIndicesMacroVars.end(), _fluidNodeIndicesMacroVars);
+}
+void GridImp::getFluidNodeIndicesApplyBodyForce(uint *_fluidNodeIndicesApplyBodyForce) const
+{
+    std::copy(fluidNodeIndicesApplyBodyForce.begin(), fluidNodeIndicesApplyBodyForce.end(), _fluidNodeIndicesApplyBodyForce);
+}
+void GridImp::getFluidNodeIndicesAllFeatures(uint *_fluidNodeIndicesAllFeatures) const
+{
+    std::copy(fluidNodeIndicesAllFeatures.begin(), fluidNodeIndicesAllFeatures.end(), _fluidNodeIndicesAllFeatures);
+}
+
+
+std::vector<SideType> GridImp::getBCAlreadySet() {
+    return this->bcAlreadySet;
+}
+
+void GridImp::addBCalreadySet(SideType side)
+{
+    this->bcAlreadySet.push_back(side);
+}
+
+
 void GridImp::print() const
 {
     printf("min: (%2.4f, %2.4f, %2.4f), max: (%2.4f, %2.4f, %2.4f), size: %d, delta: %2.4f\n", startX, startY, startZ,
@@ -2087,3 +2211,10 @@ void GridImp::print() const
     if(this->gridInterface)
         this->gridInterface->print();
 }
+
+bool GridImp::isStopperForBC(uint index) const
+{
+    return (this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
+            this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID ||
+            this->getFieldEntry(index) == vf::gpu::STOPPER_SOLID);
+}
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index edb5ca916bf68dcf992ea214dcddb2dc43810352..2cd322ebed78daaf135ad97b881923ca5831bbcd 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -34,6 +34,7 @@
 #define GRID_IMP_H
 
 #include <array>
+#include <vector>
 
 #include "Core/LbmOrGks.h"
 
@@ -52,6 +53,7 @@ class Object;
 class BoundingBox;
 class TriangularMeshDiscretizationStrategy;
 
+
 #ifdef __GNUC__
     #ifndef __clang__
         #pragma push
@@ -76,7 +78,7 @@ protected:
 
 public:
     static SPtr<GridImp> makeShared(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level);
-    virtual ~GridImp() = default;
+    ~GridImp() override = default;
 
 private:
     void initalNumberOfNodesAndSize();
@@ -92,6 +94,7 @@ private:
     bool nodeInPreviousCellIs(int index, char type) const;
     bool nodeInCellIs(Cell& cell, char type) const override;
 
+
     uint getXIndex(real x) const;
     uint getYIndex(real y) const;
     uint getZIndex(real z) const;
@@ -115,8 +118,11 @@ private:
 
     int *sparseIndices;
 
-    std::vector<uint> fluidNodeIndices;
-    std::vector<uint> fluidNodeIndicesBorder;
+    std::vector<uint> fluidNodeIndices;                 // run on CollisionTemplate::Default
+    std::vector<uint> fluidNodeIndicesBorder;           // run on subdomain border nodes (CollisionTemplate::SubDomainBorder)
+    std::vector<uint> fluidNodeIndicesMacroVars;        // run on CollisionTemplate::MacroVars
+    std::vector<uint> fluidNodeIndicesApplyBodyForce;   // run on CollisionTemplate::ApplyBodyForce
+    std::vector<uint> fluidNodeIndicesAllFeatures;      // run on CollisionTemplate::AllFeatures
 
 	uint *qIndices;     //maps from matrix index to qIndex
 	real *qValues;
@@ -132,6 +138,8 @@ private:
 
     bool enableFixRefinementIntoTheWall;
 
+    std::vector<SideType> bcAlreadySet;
+
 protected:
     Field field;
     int *neighborIndexX, *neighborIndexY, *neighborIndexZ, *neighborIndexNegative;
@@ -146,9 +154,9 @@ public:
     void setPeriodicityY(bool periodicity) override;
     void setPeriodicityZ(bool periodicity) override;
 
-    bool getPeriodicityX() override;
-    bool getPeriodicityY() override;
-    bool getPeriodicityZ() override;
+    bool getPeriodicityX() const override;
+    bool getPeriodicityY() const override;
+    bool getPeriodicityZ() const override;
 
     void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) override;
 
@@ -182,6 +190,9 @@ public:
 
     void setNumberOfLayers(uint numberOfLayers) override;
 
+    std::vector<SideType> getBCAlreadySet() override;
+    void addBCalreadySet(SideType side) override;
+
 public:
     Distribution distribution;
 
@@ -216,6 +227,7 @@ public:
     bool nodeInNextCellIs(int index, char type) const;
     bool hasAllNeighbors(uint index) const;
     bool hasNeighborOfType(uint index, char type) const;
+    bool nodeHasBC(uint index) const override;
     bool cellContainsOnly(Cell &cell, char type) const;
     bool cellContainsOnly(Cell &cell, char typeA, char typeB) const;
 
@@ -256,6 +268,8 @@ public:
     static void getGridInterface(uint *gridInterfaceList, const uint *oldGridInterfaceList, uint size);
 
     bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const override;
+    
+    bool isStopperForBC(uint index) const override;
 
     int *getNeighborsX() const override;
     int* getNeighborsY() const override;
@@ -273,7 +287,7 @@ public:
     void print() const;
 
 public:
-    virtual void findSparseIndices(SPtr<Grid> fineGrid) override;
+    void findSparseIndices(SPtr<Grid> fineGrid) override;
 
     void findForGridInterfaceNewIndices(SPtr<GridImp> fineGrid);
     void updateSparseIndices();
@@ -364,6 +378,19 @@ public:
     uint getNumberOfFluidNodesBorder() const override;
     void getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const override;
 
+    void addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars) override;
+    void addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce) override;
+    void addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) override;
+    void sortFluidNodeIndicesMacroVars() override;
+    void sortFluidNodeIndicesApplyBodyForce() override;
+    void sortFluidNodeIndicesAllFeatures() override;
+
+    uint getNumberOfFluidNodeIndicesMacroVars() const override;
+    uint getNumberOfFluidNodeIndicesApplyBodyForce() const override;
+    uint getNumberOfFluidNodeIndicesAllFeatures() const override; 
+    void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars) const override;
+    void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce) const override;
+    void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures) const override;
 
 public:
     struct CommunicationIndices {
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
index 23fb0f4e7f3e16702e9cb2459606986af1032e49..0238434dc87b453dc21164577d8abd4ce1819793 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
@@ -417,10 +417,10 @@ void SimulationFileWriter::writeGridInterfaceToFile(SPtr<GridBuilder> builder, u
     }
 }
 
-void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, std::ofstream& coarseFile, uint* coarse, std::ofstream& fineFile, uint* fine)
+void SimulationFileWriter::writeGridInterfaceToFile(uint numberOfNodes, std::ofstream &coarseFile, uint *coarse,
+                                                    std::ofstream &fineFile, uint *fine)
 {
-    for (uint index = 0; index < numberOfNodes; index++)
-    {
+    for (uint index = 0; index < numberOfNodes; index++) {
         coarseFile << coarse[index] << " \n";
         fineFile << fine[index] << " \n";
     }
@@ -428,17 +428,15 @@ void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, st
     fineFile << "\n";
 }
 
-void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream & offsetFile, real* offset_X, real* offset_Y, real* offset_Z)
+void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream &offsetFile, real *offset_X,
+                                                          real *offset_Y, real *offset_Z)
 {
-    for (uint index = 0; index < numberOfNodes; index++)
-    {
+    for (uint index = 0; index < numberOfNodes; index++) {
         offsetFile << offset_X[index] << " " << offset_Y[index] << " " << offset_Z[index] << " \n";
     }
     offsetFile << "\n";
 }
 
-
-
 /*#################################################################################*/
 /*---------------------------------private methods---------------------------------*/
 /*---------------------------------------------------------------------------------*/
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index 759528e5346ba8d9899cb90eb64503b20a44c4fc..ed647cb406bca23ef90667b7d17171c7b3f46283 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -8,7 +8,7 @@ if(MSVC)
     set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix
 endif()
 
-vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda)
+vf_add_library(PUBLIC_LINK basics lbm PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda)
 
 #SET(TPN_WIN32 "/EHsc")
 #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
index f8f5c42b835a1a4ba55e378e624230bbb43dc05a..e3f344231dc9d5e19c09f7ce1fde7d31f1770232 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
@@ -17,11 +17,11 @@ void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void init2ndMoments(Parameter* para)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->kxyFromfcNEQ[pos]    = 0.0;
 			para->getParH(lev)->kyzFromfcNEQ[pos]    = 0.0;
@@ -116,7 +116,7 @@ void init3rdMoments(Parameter* para)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->CUMbbb[pos] = 0.0;
 			para->getParH(lev)->CUMabc[pos] = 0.0;
@@ -198,7 +198,7 @@ void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//allocation (device-memory + host-memory)
@@ -211,11 +211,11 @@ void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManag
 
 void initHigherOrderMoments(Parameter* para)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->CUMcbb[pos] = 0.0;
 			para->getParH(lev)->CUMbcb[pos] = 0.0;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
index 77db571f7f10e0ea0bff827400270dd074d4e666..80a667f91976b745b619fed5d5763b5429a6559c 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
@@ -11,16 +11,16 @@
 
 void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		cudaMemoryManager->cudaAllocMedianOut(lev);
-		for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = (real)0.0;
-			para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = (real)0.0;
+			para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0;
 		}
 	}
 }
@@ -31,15 +31,15 @@ void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void calcMedian(Parameter* para, uint tdiff)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
-		for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+        for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = para->getParH(lev)->vx_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = para->getParH(lev)->vy_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = para->getParH(lev)->vz_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = para->getParH(lev)->rho_SP_Med[i]  / (real)tdiff;
-			para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i]/ (real)tdiff;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = para->getParH(lev)->vx_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = para->getParH(lev)->vy_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = para->getParH(lev)->vz_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = para->getParH(lev)->rho_SP_Med[pos]  / (real)tdiff;
+			para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos]/ (real)tdiff;
 		}
 	}
 }
@@ -75,14 +75,14 @@ void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
         cudaMemoryManager->cudaAllocMedianOutAD(lev);
-		for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = (real)0.0;
-			para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0;
-			para->getParH(lev)->Conc_Med_Out[i]     = (real)0.0;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = (real)0.0;
+			para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0;
+			para->getParH(lev)->Conc_Med_Out[pos]     = (real)0.0;
 		}
 	}
 }
@@ -95,14 +95,14 @@ void calcMedianAD(Parameter* para, uint tdiff)
 {
 	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
-		for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = para->getParH(lev)->vx_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = para->getParH(lev)->vy_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = para->getParH(lev)->vz_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = para->getParH(lev)->rho_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i] / (real)tdiff;
-			para->getParH(lev)->Conc_Med_Out[i]     = para->getParH(lev)->Conc_Med[i]     / (real)tdiff;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = para->getParH(lev)->vx_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = para->getParH(lev)->vy_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = para->getParH(lev)->vz_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = para->getParH(lev)->rho_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos] / (real)tdiff;
+			para->getParH(lev)->Conc_Med_Out[pos]     = para->getParH(lev)->Conc_Med[pos]     / (real)tdiff;
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
index e91fb6f5c232bd98073a1c930149693f8af4b078..9572252965e1c619702370f8b9a3756bf035035e 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
@@ -25,32 +25,32 @@ void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryM
     for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
         cudaMemoryManager->cudaCopyTurbulenceIntensityDH(lev, para->getParH(lev)->numberOfNodes);
 
-        for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++) {
+        for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) {
             // mean velocity
-            para->getParH(lev)->vx_mean[i] = para->getParH(lev)->vx_mean[i] / (real)tdiff;
-            para->getParH(lev)->vy_mean[i] = para->getParH(lev)->vy_mean[i] / (real)tdiff;
-            para->getParH(lev)->vz_mean[i] = para->getParH(lev)->vz_mean[i] / (real)tdiff;
+            para->getParH(lev)->vx_mean[pos] = para->getParH(lev)->vx_mean[pos] / (real)tdiff;
+            para->getParH(lev)->vy_mean[pos] = para->getParH(lev)->vy_mean[pos] / (real)tdiff;
+            para->getParH(lev)->vz_mean[pos] = para->getParH(lev)->vz_mean[pos] / (real)tdiff;
 
             // fluctuations
-            para->getParH(lev)->vxx[i] = para->getParH(lev)->vxx[i] / (real)tdiff;
-            para->getParH(lev)->vyy[i] = para->getParH(lev)->vyy[i] / (real)tdiff;
-            para->getParH(lev)->vzz[i] = para->getParH(lev)->vzz[i] / (real)tdiff;
-            para->getParH(lev)->vxy[i] = para->getParH(lev)->vxy[i] / (real)tdiff;
-            para->getParH(lev)->vxz[i] = para->getParH(lev)->vxz[i] / (real)tdiff;
-            para->getParH(lev)->vyz[i] = para->getParH(lev)->vyz[i] / (real)tdiff;
-
-            para->getParH(lev)->vxx[i] =
-                para->getParH(lev)->vxx[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vx_mean[i];
-            para->getParH(lev)->vyy[i] =
-                para->getParH(lev)->vyy[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vy_mean[i];
-            para->getParH(lev)->vzz[i] =
-                para->getParH(lev)->vzz[i] - para->getParH(lev)->vz_mean[i] * para->getParH(lev)->vz_mean[i];
-            para->getParH(lev)->vxy[i] =
-                para->getParH(lev)->vxy[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vy_mean[i];
-            para->getParH(lev)->vxz[i] =
-                para->getParH(lev)->vxz[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vz_mean[i];
-            para->getParH(lev)->vyz[i] =
-                para->getParH(lev)->vyz[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vz_mean[i];
+            para->getParH(lev)->vxx[pos] = para->getParH(lev)->vxx[pos] / (real)tdiff;
+            para->getParH(lev)->vyy[pos] = para->getParH(lev)->vyy[pos] / (real)tdiff;
+            para->getParH(lev)->vzz[pos] = para->getParH(lev)->vzz[pos] / (real)tdiff;
+            para->getParH(lev)->vxy[pos] = para->getParH(lev)->vxy[pos] / (real)tdiff;
+            para->getParH(lev)->vxz[pos] = para->getParH(lev)->vxz[pos] / (real)tdiff;
+            para->getParH(lev)->vyz[pos] = para->getParH(lev)->vyz[pos] / (real)tdiff;
+
+            para->getParH(lev)->vxx[pos] =
+                para->getParH(lev)->vxx[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vx_mean[pos];
+            para->getParH(lev)->vyy[pos] =
+                para->getParH(lev)->vyy[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vy_mean[pos];
+            para->getParH(lev)->vzz[pos] =
+                para->getParH(lev)->vzz[pos] - para->getParH(lev)->vz_mean[pos] * para->getParH(lev)->vz_mean[pos];
+            para->getParH(lev)->vxy[pos] =
+                para->getParH(lev)->vxy[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vy_mean[pos];
+            para->getParH(lev)->vxz[pos] =
+                para->getParH(lev)->vxz[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vz_mean[pos];
+            para->getParH(lev)->vyz[pos] =
+                para->getParH(lev)->vyz[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vz_mean[pos];
         }
     }
 }
@@ -146,7 +146,7 @@ void writeAllTiDatafToFile(Parameter *para, uint timestep)
     }
 }
 
-void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data,
                         std::vector<std::string> &datanames)
 {
     ////////////////////////////////////////////////////////////////////////
@@ -169,10 +169,10 @@ void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::
     ostr << std::endl;
     ////////////////////////////////////////////////////////////////////////
     // fill file with data
-    for (int i = 0; i < sizeOfTiArray; i++) {
-        ostr << i;
+    for (size_t pos = 0; pos < sizeOfTiArray; pos++) {
+        ostr << pos;
         for (auto dataset : data)
-            ostr << "\t" << dataset[i];
+            ostr << "\t" << dataset[pos];
         ostr << std::endl;
     }
     ////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
index f70973eb5921a17c3229a026623de2a0ef9f3ce4..a76c2d0dde99ad9fb3fd38137b6c72e5c3f5a6c3 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
@@ -18,7 +18,7 @@ void writeVeloFluctuationToFile(Parameter *para, uint timeste);
 void writeVeloMeansToFile(Parameter *para, uint timestep);
 void writeAllTiDatafToFile(Parameter *para, uint timestep);
 
-void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data,
                   std::vector<std::string> &datanames);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
index 4a14d19c10936f84379f332ef24f081f0ebb0cb7..49543f37df7fb54290f4ab6c09edb8d10c0b67be 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
@@ -39,8 +39,14 @@ void CollisionAndExchange_noStreams_indexKernel::operator()(UpdateGrid27 *update
     //!
     //! 1. run collision
     //!
-    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices,
-                                    para->getParD(level)->numberOfFluidNodes, -1);
+    for( CollisionTemplate tag: para->getParH(level)->allocatedBulkFluidNodeTags )
+    {
+        updateGrid->collisionUsingIndices(  level, t, 
+                                            para->getParD(level)->taggedFluidNodeIndices[tag],
+                                            para->getParD(level)->numberOfTaggedFluidNodes[tag],
+                                            tag,
+                                            CudaStreamIndex::Legacy);
+    }
 
     //! 2. exchange information between GPUs
     updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
@@ -61,28 +67,35 @@ void CollisionAndExchange_noStreams_oldKernel::operator()(UpdateGrid27 *updateGr
 
 void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t)
 {
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
-
     //! \details steps:
     //!
-    //! 1. run collision for nodes which are at the border of the gpus/processes
-    //!
-    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndicesBorder,
-                                    para->getParD(level)->numberOfFluidNodesBorder, borderStreamIndex);
+    //! 1. run collision for nodes which are at the border of the gpus/processes, running with WriteMacroVars in case probes sample on these nodes
+    //!    
+    updateGrid->collisionUsingIndices(  level, t, 
+                                        para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder],
+                                        para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], 
+                                        CollisionTemplate::WriteMacroVars,  
+                                        CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex);
+    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
-
-    //! 3. launch the collision kernel for bulk nodes
-    //!
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices,
-                                    para->getParD(level)->numberOfFluidNodes, bulkStreamIndex);
-
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
+
+    //! 3. launch the collision kernel for bulk nodes. This includes nodes with \param tag Default, WriteMacroVars, ApplyBodyForce, 
+    //!    or AllFeatures. All assigned tags are listed in \param allocatedBulkFluidNodeTags during initialization in Simulation::init
+
+    para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
+    
+    for( CollisionTemplate tag: para->getParH(level)->allocatedBulkFluidNodeTags )
+    {
+        updateGrid->collisionUsingIndices(  level, t, 
+                                            para->getParD(level)->taggedFluidNodeIndices[tag],
+                                            para->getParD(level)->numberOfTaggedFluidNodes[tag], 
+                                            tag,
+                                            CudaStreamIndex::Bulk);
+    }
     //! 4. exchange information between GPUs
-    updateGrid->exchangeMultiGPU(level, borderStreamIndex);
+    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
index d62e8fee24dad1cde7ccd2044a5a5f9573f7ff82..cc1d2eb748b01835b46f5fc69f47ed3ddc17a28d 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
@@ -53,7 +53,7 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan
 	 {
 		 //////////////////////////////////////////////////////////////////////
 		 //measure the velocity
-		 int numberOfElements = para->getParH(lev)->numberOfNodes;
+		 unsigned long long numberOfElements = para->getParH(lev)->numberOfNodes;
 		 if (numberOfElements > 0)
 		 {
 			 CalcMacCompSP27(para->getParD(lev)->velocityX,
@@ -74,11 +74,11 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan
 			 cudaMemoryManager->cudaCopyPrint(lev);
 //			 para->cudaCopyForceVelo(i,numberOfElements);
 			 //////////////////////////////////////////////////////////////////
-			 for (int j = 0; j < numberOfElements; j++)
+			 for (size_t pos = 0; pos < numberOfElements; pos++)
 			 {
-				 tempVeloX += (double)para->getParH(lev)->velocityX[j];
-				 tempVeloY += (double)para->getParH(lev)->velocityY[j];
-				 tempVeloZ += (double)para->getParH(lev)->velocityZ[j];
+				 tempVeloX += (double)para->getParH(lev)->velocityX[pos];
+				 tempVeloY += (double)para->getParH(lev)->velocityY[pos];
+				 tempVeloZ += (double)para->getParH(lev)->velocityZ[pos];
 			 }
 			 tempVeloX /= (double)numberOfElements;
 			 tempVeloY /= (double)numberOfElements;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
index cd74216e1fbe7b718c72046ace4b7d2e7cf451fe..b8ca4e9c2020e17cd0192267ac5d931b510afc3a 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
@@ -38,67 +38,62 @@ void NoRefinement::operator()(UpdateGrid27 *updateGrid, Parameter *para, int lev
 
 void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
 {
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
-
     //! \details steps:
     //!
     //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, borderStreamIndex);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPUAfterFtoC(level, borderStreamIndex);
+    updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
 
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, bulkStreamIndex);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, bulkStreamIndex);
+    para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (only nodes which are part of the interpolation)
     //!
-    updateGrid->exchangeMultiGPUAfterFtoC(level, borderStreamIndex);
+    updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder);
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, borderStreamIndex);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
 
-void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level){
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
-
+void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
+{
     //! \details steps:
     //!
     //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, borderStreamIndex);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex);
+    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
 
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, bulkStreamIndex);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, bulkStreamIndex);
+    para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (all nodes)
     //!
-    updateGrid->exchangeMultiGPU(level, borderStreamIndex);
+    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, borderStreamIndex);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
@@ -109,14 +104,14 @@ void RefinementAndExchange_noStreams_exchangeInterface::operator()(UpdateGrid27
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, -1);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
 
     //! 2. exchange information between GPUs (only nodes which are part of the interpolation)
     //!
     updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, true);
 
     //! 3. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, -1);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
 }
 
 void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
@@ -125,14 +120,14 @@ void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, -1);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
 
     //! 2. exchange information between GPUs (all nodes)
     //!
     updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
 
     //! 3. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, -1);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
 }
 
 void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
@@ -141,7 +136,7 @@ void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, -1);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
     //! 2. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, -1);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 296ab819c5538a6b6d6a6827b5c28cbc475af838..4136614dfbfc9e0d2fc1bf7f4b01624f94eabb6f 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -22,13 +22,17 @@ void UpdateGrid27::updateGrid(int level, unsigned int t)
         updateGrid(level + 1, t);
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    
+    interactWithProbes(level, t);
+
     //////////////////////////////////////////////////////////////////////////
 
     collision(this, para.get(), level, t);
 
     //////////////////////////////////////////////////////////////////////////
 
-    postCollisionBC(level);
+    postCollisionBC(level, t);
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -47,13 +51,14 @@ void UpdateGrid27::updateGrid(int level, unsigned int t)
 
     //////////////////////////////////////////////////////////////////////////
     if( level != para->getFine() )
-    {
+    {   
         refinement(this, para.get(), level);
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    
     interactWithActuators(level, t);
 
-    interactWithProbes(level, t);
 }
 
 void UpdateGrid27::collisionAllNodes(int level, unsigned int t)
@@ -71,15 +76,16 @@ void UpdateGrid27::collisionAllNodes(int level, unsigned int t)
         collisionAdvectionDiffusion(level);
 }
 
-void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream)
+void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *taggedFluidNodeIndices, uint numberOfTaggedFluidNodes, CollisionTemplate collisionTemplate, CudaStreamIndex stream)
 {
-    if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0)
-        kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream);
+    if (taggedFluidNodeIndices != nullptr && numberOfTaggedFluidNodes != 0)
+        kernels.at(level)->runOnIndices(taggedFluidNodeIndices, numberOfTaggedFluidNodes, collisionTemplate, stream);
     else
-        std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not definded"
+        std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not defined"
                       << std::endl;
 
     //////////////////////////////////////////////////////////////////////////
+    //! \todo: AD collision and porousMedia should be called separately, not in collisionUsingIndices
 
     if (para->getSimulatePorousMedia())
         collisionPorousMedia(level);
@@ -118,21 +124,21 @@ void UpdateGrid27::collisionAdvectionDiffusion(int level)
     this->adKernelManager->runADcollisionKernel(level);
 }
 
-void UpdateGrid27::prepareExchangeMultiGPU(int level, int streamIndex)
+void UpdateGrid27::prepareExchangeMultiGPU(int level, CudaStreamIndex streamIndex)
 {
     prepareExchangeCollDataXGPU27AllNodes(para.get(), level, streamIndex);
     prepareExchangeCollDataYGPU27AllNodes(para.get(), level, streamIndex);
     prepareExchangeCollDataZGPU27AllNodes(para.get(), level, streamIndex);
 }
 
-void UpdateGrid27::prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex)
+void UpdateGrid27::prepareExchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex)
 {
     prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, streamIndex);
     prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, streamIndex);
     prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, streamIndex);
 }
 
-void UpdateGrid27::exchangeMultiGPU(int level, int streamIndex)
+void UpdateGrid27::exchangeMultiGPU(int level, CudaStreamIndex streamIndex)
 {
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition
@@ -168,30 +174,30 @@ void UpdateGrid27::exchangeMultiGPU_noStreams_withPrepare(int level, bool useRed
     // 3D domain decomposition
     if (useReducedComm) {
         // X
-        prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, -1);
-        exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, -1);
+        prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
         // Y
-        prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, -1);
-        exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, -1);
+        prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
         // Z
-        prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, -1);
-        exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, -1);
+        prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy);
     } else {
         // X
-        prepareExchangeCollDataXGPU27AllNodes(para.get(), level, -1);
-        exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, -1);
+        prepareExchangeCollDataXGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
         // Y
-        prepareExchangeCollDataYGPU27AllNodes(para.get(), level, -1);
-        exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, -1);
+        prepareExchangeCollDataYGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
         // Z
-        prepareExchangeCollDataZGPU27AllNodes(para.get(), level, -1);
-        exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
-        scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, -1);
+        prepareExchangeCollDataZGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
+        exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy);
+        scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy);
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -204,7 +210,7 @@ void UpdateGrid27::exchangeMultiGPU_noStreams_withPrepare(int level, bool useRed
         exchangePostCollDataADZGPU27(para.get(), comm, cudaMemoryManager.get(), level);
     }
 }
-void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex)
+void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex)
 {
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition
@@ -227,9 +233,10 @@ void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex)
     }
 }
 
-void UpdateGrid27::postCollisionBC(int level)
+void UpdateGrid27::postCollisionBC(int level, uint t)
 {
     //////////////////////////////////////////////////////////////////////////
+    // G E O M E T R Y
     // V E L O C I T Y (I N F L O W)
     this->bcKernelManager->runVelocityBCKernelPost(level);
 
@@ -257,6 +264,10 @@ void UpdateGrid27::postCollisionBC(int level)
     // P R E S S U R E
     this->bcKernelManager->runPressureBCKernelPost(level);
 
+    //////////////////////////////////////////////////////////////////////////
+    // P R E C U R S O R
+    this->bcKernelManager->runPrecursorBCKernelPost(level, t, cudaMemoryManager.get());
+
     //////////////////////////////////////////////////////////////////////////
     // A D V E C T I O N    D I F F U S I O N
     if (para->getDiffOn())
@@ -317,13 +328,12 @@ void UpdateGrid27::preCollisionBC(int level, unsigned int t)
     //////////////////////////////////////////////////////////////////////////////////
 }
 
-void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC,
-                                int streamIndex)
+void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex)
 {
     gridScalingKernelManager->runFineToCoarseKernelLB(level, icellFC, offFC, streamIndex);
 
     if (para->getDiffOn()) {
-        if (streamIndex != -1) {
+        if (para->getStreamManager()->streamIsRegistered(streamIndex)) {
             printf("fineToCoarse Advection Diffusion not implemented"); // TODO
             return;
         }
@@ -331,14 +341,13 @@ void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &
     }
 }
 
-void UpdateGrid27::coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF,
-                                int streamIndex)
+void UpdateGrid27::coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex)
 {
     this->gridScalingKernelManager->runCoarseToFineKernelLB(level, icellCF, offCF, streamIndex);
 
     if (para->getDiffOn())
     {
-        if (streamIndex != -1){
+        if(para->getStreamManager()->streamIsRegistered(streamIndex)){
             printf("CoarseToFineWithStream Advection Diffusion not implemented"); // TODO
             return;
         }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 8110923bf066412e2bb09ffa1f10efe3ddc983c7..8ce2cf5bfd72f9f53cdb35bc92502ee9ca0d3ad8 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -4,6 +4,7 @@
 #include "LBM/LB.h"
 #include "GPU/GPU_Interface.h"
 #include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
 #include "GPU/CudaMemoryManager.h"
 #include "Communication/Communicator.h"
 #include "Calculation/PorousMedia.h"
@@ -15,7 +16,6 @@ class Kernel;
 class BoundaryConditionFactory;
 class GridScalingFactory;
 class TurbulenceModelFactory;
-
 class UpdateGrid27;
 using CollisionStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level, unsigned int t)>;
 using RefinementStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level)>;
@@ -31,21 +31,21 @@ public:
 
 private:
     void collisionAllNodes(int level, unsigned int t);
-    void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1);
+    void collisionUsingIndices(int level, unsigned int t, uint *taggedFluidNodeIndices = nullptr, uint numberOfTaggedFluidNodes = 0, CollisionTemplate collisionTemplate = CollisionTemplate::Default, CudaStreamIndex streamIndex=CudaStreamIndex::Legacy);
     void collisionAdvectionDiffusion(int level);
 
-    void postCollisionBC(int level);
+    void postCollisionBC(int level, unsigned int t);
     void preCollisionBC(int level, unsigned int t);
     void collisionPorousMedia(int level);
 
-    void fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, int streamIndex);
-    void coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, int streamIndex);
+    void fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex);
+    void coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex);
 
-    void prepareExchangeMultiGPU(int level, int streamIndex);
-    void prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex);
+    void prepareExchangeMultiGPU(int level, CudaStreamIndex streamIndex);
+    void prepareExchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex);
 
-    void exchangeMultiGPU(int level, int streamIndex);
-    void exchangeMultiGPUAfterFtoC(int level, int streamIndex);
+    void exchangeMultiGPU(int level, CudaStreamIndex streamIndex);
+    void exchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex);
     void exchangeMultiGPU_noStreams_withPrepare(int level, bool useReducedComm);
 
     void swapBetweenEvenAndOddTimestep(int level);
@@ -60,6 +60,7 @@ private:
     friend class CollisionAndExchange_noStreams_indexKernel;
     friend class CollisionAndExchange_noStreams_oldKernel;
     friend class CollisionAndExchange_streams;
+    friend class CollisionAndExchange_noStreams_withReadWriteFlags;
 
     RefinementStrategy refinement;
     friend class RefinementAndExchange_streams_exchangeInterface;
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 36c250401e0775b3abcc7d25c0f89fde0556631e..00a7b45668e2050467f3d1122455dc74d0ad4f1c 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -11,12 +11,12 @@ using namespace vf::lbm::dir;
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition: functions used by all directions
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
+void collectNodesInSendBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex,
                                  std::vector<ProcessNeighbor27> *sendProcessNeighbor,
                                  unsigned int numberOfSendProcessNeighbors)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
-
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
+    
     for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) {
         GetSendFsPostDev27(para->getParD(level)->distributions.f[0], 
                            (*sendProcessNeighbor)[i].f[0],
@@ -32,11 +32,11 @@ void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
     }
 }
 
-void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
+void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex,
                                    std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                    unsigned int numberOfRecvProcessNeighbors)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
     for (unsigned int i = 0; i < numberOfRecvProcessNeighbors; i++) {
         SetRecvFsPostDev27(para->getParD(level)->distributions.f[0], 
                            (*recvProcessNeighborDev)[i].f[0],
@@ -105,22 +105,22 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborX,
                                 (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
-void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborsAfterFtoCX,
                                 (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
 void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                    int level, int streamIndex)
+                                    int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborX,
                            &para->getParD(level)->recvProcessNeighborX,
                            &para->getParH(level)->sendProcessNeighborX,
@@ -128,40 +128,40 @@ void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm
 }
 
 void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                     int level, int streamIndex)
+                                     int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborsAfterFtoCX,
                            &para->getParD(level)->recvProcessNeighborsAfterFtoCX,
                            &para->getParH(level)->sendProcessNeighborsAfterFtoCX,
                            &para->getParH(level)->recvProcessNeighborsAfterFtoCX);
 }
 
-void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
-    scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborX,
+    scatterNodesFromRecvBufferGPU(para, level, streamIndex,&para->getParD(level)->recvProcessNeighborX,
                                   (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
-void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
-    scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborsAfterFtoCX,
+    scatterNodesFromRecvBufferGPU(para, level, streamIndex,&para->getParD(level)->recvProcessNeighborsAfterFtoCX,
                                   (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
-void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level,
-                            int streamIndex, 
+void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
+                            int level, CudaStreamIndex streamIndex,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborHost)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! \details steps: 
     //! 1. copy data from device to host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
-        cudaMemoryManager->cudaCopyProcessNeighborXFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborXFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs);
 
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! 2. start non-blocking receive (MPI)
@@ -181,7 +181,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! 7. copy received data from host to device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
-        cudaMemoryManager->cudaCopyProcessNeighborXFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborXFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -189,22 +189,22 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborY,
                                 (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
-void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborsAfterFtoCY,
                                 (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
 void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                    int level, int streamIndex)
+                                    int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborY,
                            &para->getParD(level)->recvProcessNeighborY, 
                            &para->getParH(level)->sendProcessNeighborY,
@@ -212,38 +212,39 @@ void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm
 }
 
 void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                     int level, int streamIndex)
+                                     int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborsAfterFtoCY,
                            &para->getParD(level)->recvProcessNeighborsAfterFtoCY, 
                            &para->getParH(level)->sendProcessNeighborsAfterFtoCY,
                            &para->getParH(level)->recvProcessNeighborsAfterFtoCY);
 }
 
-void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborY,
                                   (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
-void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborsAfterFtoCY,
                                   (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
 void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level,
-                            int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
+                            CudaStreamIndex streamIndex,
+                            std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborHost)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+    cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
-        cudaMemoryManager->cudaCopyProcessNeighborYFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborYFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs);
 
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     startNonBlockingMpiReceive((unsigned int)(*sendProcessNeighborHost).size(), comm, recvProcessNeighborHost);
@@ -276,7 +277,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Host to Device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) {
-        cudaMemoryManager->cudaCopyProcessNeighborYFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborYFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
@@ -285,61 +286,62 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborZ,
                                 (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
-void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     collectNodesInSendBufferGPU(para, level, streamIndex, &para->getParD(level)->sendProcessNeighborsAfterFtoCZ,
                                 (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
 void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                    int level, int streamIndex)
+                                    int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborZ,
                            &para->getParD(level)->recvProcessNeighborZ, 
                            &para->getParH(level)->sendProcessNeighborZ,
                            &para->getParH(level)->recvProcessNeighborZ);
 }
 void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                     int level, int streamIndex)
+                                     int level, CudaStreamIndex streamIndex)
 {
-    exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, 
+    exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex,
                            &para->getParD(level)->sendProcessNeighborsAfterFtoCZ,
                            &para->getParD(level)->recvProcessNeighborsAfterFtoCZ, 
                            &para->getParH(level)->sendProcessNeighborsAfterFtoCZ,
                            &para->getParH(level)->recvProcessNeighborsAfterFtoCZ);
 }
 
-void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborZ,
                                   (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
-void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex)
+void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
 {
     scatterNodesFromRecvBufferGPU(para, level, streamIndex, &para->getParD(level)->recvProcessNeighborsAfterFtoCZ,
                                   (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level,
-                            int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
+void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, 
+                            CudaStreamIndex streamIndex,
+                            std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborHost)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
-        cudaMemoryManager->cudaCopyProcessNeighborZFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborZFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     startNonBlockingMpiReceive((unsigned int)(*sendProcessNeighborHost).size(), comm, recvProcessNeighborHost);
     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -386,7 +388,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     // copy Host to Device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        cudaMemoryManager->cudaCopyProcessNeighborZFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex);
+        cudaMemoryManager->cudaCopyProcessNeighborZFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index ec930ebbc06554e948204b74e79e0e25b85f57b5..8302ffdc47bfa012c47df00f90c2491039f4eaee 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -6,6 +6,7 @@
 #include "GPU/GPU_Interface.h"
 #include "LBM/LB.h"
 #include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
 
 //! \file ExchangeData27.h
 //! \ingroup GPU
@@ -14,9 +15,9 @@
 
 //////////////////////////////////////////////////////////////////////////
 // 1D domain decomposition
-void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
                                          int level);
-void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
                                           int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition
@@ -24,13 +25,13 @@ void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, Cud
 // functions used for all directions
 
 //! \brief Collect the send nodes in a buffer on the gpu
-void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
-                                            std::vector<ProcessNeighbor27> *sendProcessNeighbor,
-                                            unsigned int numberOfSendProcessNeighbors);
+void collectNodesInSendBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex,
+                                 std::vector<ProcessNeighbor27> *sendProcessNeighbor,
+                                 unsigned int numberOfSendProcessNeighbors);
 //! \brief Distribute the receive nodes from the buffer on the gpu
-void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
-                                              std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
-                                              unsigned int numberOfRecvProcessNeighbors);
+void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex,
+                                   std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
+                                   unsigned int numberOfRecvProcessNeighbors);
 //! \brief Copy nodes which are part of the communication in multiple directions
 //! \details The nodes are copied from the receive buffer in one direction to the send buffer in another direction. The
 //! copy operation is conducted on the cpu. 
@@ -49,21 +50,20 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN
 
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 //! \brief Exchange routine in x direction for simulations on multiple gpus
 //! \details Send and receive the nodes from the communication buffers on the gpus.
 //! \param Communicator is needed for the communication between the processes with mpi
 //! \param CudaMemoryManager is needed for moving the data between host and device
-//! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding
 //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers
 //! to the send and receive arrays, both on the device and the host
 void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                       int level, int streamIndex,
+                                       int level, CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
@@ -71,59 +71,59 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
 //! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes
 //! \details Used in the communication after collision step
 void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
-                                               CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
+                                               CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
 //! interpolation processes on refined grids 
 //! \details Only exchange nodes which are part of the interpolation process on
 //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and
 //! before the interpolation coarse to fine. See [master thesis of Anna Wellmann]
 void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
-                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
+                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // y
 
-void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
+void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
 void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                       int level, int streamIndex,
+                                       int level,CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHos);
 void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
-                                               CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
+                                               CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
-                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
+void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
+void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
 // z
-void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
+void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
 void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
-                                       int level, int streamIndex,
+                                       int level, CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
 void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
-                                               CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
+                                               CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
-                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
+                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 
-void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
+void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition convection diffusion
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index e197fb5c28611e77406b30ab39aa6af2f54b9ef5..3b511264e9c7edc80bbe367cac4a9b6d8725674b 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -19,34 +19,29 @@ std::shared_ptr<GridProvider> GridProvider::makeGridReader(FILEFORMAT format, st
     return std::shared_ptr<GridProvider>(new GridReader(format, para, cudaMemoryManager));
 }
 
-void GridProvider::setNumberOfNodes(const int numberOfNodes, const int level) const
+void GridProvider::setNumberOfNodes(uint numberOfNodes, int level) const
 {
-    para->getParH(level)->numberOfNodes = numberOfNodes;
-    para->getParD(level)->numberOfNodes = numberOfNodes;
-    para->getParH(level)->mem_size_real_SP = sizeof(real) * para->getParH(level)->numberOfNodes;
-    para->getParH(level)->mem_size_int_SP = sizeof(uint) * para->getParH(level)->numberOfNodes;
-    para->getParD(level)->mem_size_real_SP = sizeof(real) * para->getParD(level)->numberOfNodes;
-    para->getParD(level)->mem_size_int_SP = sizeof(uint) * para->getParD(level)->numberOfNodes;
+    para->getParH(level)->numberOfNodes          = (unsigned long long)numberOfNodes;
+    para->getParD(level)->numberOfNodes          = (unsigned long long)numberOfNodes;
+    para->getParH(level)->memSizeRealLBnodes     = sizeof(real) * para->getParH(level)->numberOfNodes;
+    para->getParD(level)->memSizeRealLBnodes     = sizeof(real) * para->getParD(level)->numberOfNodes;
+    para->getParH(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParH(level)->numberOfNodes;
+    para->getParD(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParD(level)->numberOfNodes;
 }
 
-void GridProvider::setNumberOfFluidNodes(const int numberOfNodes, const int level) const
+void GridProvider::setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const
 {
-    para->getParH(level)->numberOfFluidNodes = numberOfNodes;
-    para->getParD(level)->numberOfFluidNodes = numberOfNodes;
+    para->getParH(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes;
+    para->getParD(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes;
 }
 
-void GridProvider::setNumberOfFluidNodesBorder(const int numberOfNodes, const int level) const {
-    para->getParH(level)->numberOfFluidNodesBorder = numberOfNodes;
-    para->getParD(level)->numberOfFluidNodesBorder = numberOfNodes;
-}
-
-void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level) const
+void GridProvider::setInitalNodeValues(uint numberOfNodes, int level) const
 {
-    for (int j = 1; j <= numberOfNodes; j++)
+    for (uint pos = 1; pos <= numberOfNodes; pos++)
     {
-        const real coordX = para->getParH(level)->coordinateX[j];
-        const real coordY = para->getParH(level)->coordinateY[j];
-        const real coordZ = para->getParH(level)->coordinateZ[j];
+        const real coordX = para->getParH(level)->coordinateX[pos];
+        const real coordY = para->getParH(level)->coordinateY[pos];
+        const real coordZ = para->getParH(level)->coordinateZ[pos];
 
         real rho, vx, vy, vz;
 
@@ -63,40 +58,40 @@ void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level)
             vz  = real(0.0);
         }
 
-        para->getParH(level)->rho[j] = rho; 
-        para->getParH(level)->velocityX[j]  = vx; 
-        para->getParH(level)->velocityY[j]  = vy;
-        para->getParH(level)->velocityZ[j]  = vz; 
+        para->getParH(level)->rho[pos] = rho; 
+        para->getParH(level)->velocityX[pos]  = vx; 
+        para->getParH(level)->velocityY[pos]  = vy;
+        para->getParH(level)->velocityZ[pos]  = vz; 
 
         //////////////////////////////////////////////////////////////////////////
 
         if (para->getCalcMedian()) {
-            para->getParH(level)->vx_SP_Med[j] = 0.0f;
-            para->getParH(level)->vy_SP_Med[j] = 0.0f;
-            para->getParH(level)->vz_SP_Med[j] = 0.0f;
-            para->getParH(level)->rho_SP_Med[j] = 0.0f;
-            para->getParH(level)->press_SP_Med[j] = 0.0f;
+            para->getParH(level)->vx_SP_Med[pos] = 0.0f;
+            para->getParH(level)->vy_SP_Med[pos] = 0.0f;
+            para->getParH(level)->vz_SP_Med[pos] = 0.0f;
+            para->getParH(level)->rho_SP_Med[pos] = 0.0f;
+            para->getParH(level)->press_SP_Med[pos] = 0.0f;
         }
         if (para->getUseWale()) {
-            para->getParH(level)->turbViscosity[j] = 0.0f;
+            para->getParH(level)->turbViscosity[pos] = 0.0f;
             //Debug
-            para->getParH(level)->gSij[j] = 0.0f;
-            para->getParH(level)->gSDij[j] = 0.0f;
-            para->getParH(level)->gDxvx[j] = 0.0f;
-            para->getParH(level)->gDyvx[j] = 0.0f;
-            para->getParH(level)->gDzvx[j] = 0.0f;
-            para->getParH(level)->gDxvy[j] = 0.0f;
-            para->getParH(level)->gDyvy[j] = 0.0f;
-            para->getParH(level)->gDzvy[j] = 0.0f;
-            para->getParH(level)->gDxvz[j] = 0.0f;
-            para->getParH(level)->gDyvz[j] = 0.0f;
-            para->getParH(level)->gDzvz[j] = 0.0f;
+            para->getParH(level)->gSij[pos] = 0.0f;
+            para->getParH(level)->gSDij[pos] = 0.0f;
+            para->getParH(level)->gDxvx[pos] = 0.0f;
+            para->getParH(level)->gDyvx[pos] = 0.0f;
+            para->getParH(level)->gDzvx[pos] = 0.0f;
+            para->getParH(level)->gDxvy[pos] = 0.0f;
+            para->getParH(level)->gDyvy[pos] = 0.0f;
+            para->getParH(level)->gDzvy[pos] = 0.0f;
+            para->getParH(level)->gDxvz[pos] = 0.0f;
+            para->getParH(level)->gDyvz[pos] = 0.0f;
+            para->getParH(level)->gDzvz[pos] = 0.0f;
         }
 
         if (para->getIsBodyForce()) {
-            para->getParH(level)->forceX_SP[j] = 0.0f;
-            para->getParH(level)->forceY_SP[j] = 0.0f;
-            para->getParH(level)->forceZ_SP[j] = 0.0f;
+            para->getParH(level)->forceX_SP[pos] = 0.0f;
+            para->getParH(level)->forceY_SP[pos] = 0.0f;
+            para->getParH(level)->forceZ_SP[pos] = 0.0f;
         }
     }
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index 5fc5826735643ec748da169160e782004d7e5fb7..007db1e0d8e27b3810aa38c089bae8069bbe5813 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -5,7 +5,7 @@
 #include <vector>
 #include <memory>
 
-
+#include "LBM/LB.h"
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 #include "gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
@@ -24,34 +24,35 @@ public:
     static std::shared_ptr<GridProvider> makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator);
     static std::shared_ptr<GridProvider> makeGridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
 
-	virtual void allocArrays_CoordNeighborGeo() = 0;
-	virtual void allocArrays_BoundaryValues() = 0;
-	virtual void allocArrays_BoundaryQs() = 0;
+    virtual void allocArrays_CoordNeighborGeo() = 0;
+    virtual void allocArrays_BoundaryValues() = 0;
+    virtual void allocArrays_BoundaryQs() = 0;
     virtual void allocArrays_OffsetScale() = 0;
-    virtual void allocArrays_fluidNodeIndices() = 0;
-    virtual void allocArrays_fluidNodeIndicesBorder() = 0;
+    virtual void allocArrays_taggedFluidNodes() = 0;
+
+    virtual void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) = 0;
+    virtual void sortFluidNodeTags() = 0;
 
-	virtual void setDimensions() = 0;
-	virtual void setBoundingBox() = 0;
-	virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0;
+    virtual void setDimensions() = 0;
+    virtual void setBoundingBox() = 0;
+    virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0;
 
     virtual void allocAndCopyForcing();
     virtual void allocAndCopyQuadricLimiters();
     virtual void freeMemoryOnHost();
     virtual void cudaCopyDataToHost(int level);
 
-	virtual ~GridProvider() = default;
+    virtual ~GridProvider() = default;
     virtual void initalGridInformations() = 0;
 
 protected:
-	void setNumberOfNodes(const int numberOfNodes, const int level) const;
-    void setNumberOfFluidNodes(const int numberOfNodes, const int level) const;
-    void setNumberOfFluidNodesBorder(const int numberOfNodes, const int level) const;
-    virtual void setInitalNodeValues(const int numberOfNodes, const int level) const;
-
-	void setPressSizePerLevel(int level, int sizePerLevel) const;
-	void setVelocitySizePerLevel(int level, int sizePerLevel) const;
-	void setOutflowSizePerLevel(int level, int sizePerLevel) const;
+    void setNumberOfNodes(uint numberOfNodes, int level) const;
+    void setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const;
+    virtual void setInitalNodeValues(uint numberOfNodes, int level) const;
+
+    void setPressSizePerLevel(int level, int sizePerLevel) const;
+    void setVelocitySizePerLevel(int level, int sizePerLevel) const;
+    void setOutflowSizePerLevel(int level, int sizePerLevel) const;
 
     std::shared_ptr<Parameter> para;
     std::shared_ptr<CudaMemoryManager> cudaMemoryManager;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index fa432a1d9c3922b88e93588548db74083275ef1e..a1c8554cc4e262e9f1eca4204aed4ffcfd4c3a87 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -47,7 +47,7 @@ bool GridReader::getBinaer()
 
 void rearrangeGeometry(Parameter* para, int lev)
 {
-    for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++)
+    for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
     {
         if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD)
         {
@@ -74,11 +74,11 @@ void GridReader::allocArrays_CoordNeighborGeo()
 	uint numberOfNodesGlobal = 0;
 	std::cout << "Number of Nodes: " << std::endl;
 
-	for (uint level = 0; level <= maxLevel; level++) 
-	{		
-		int numberOfNodesPerLevel = coordX.getSize(level) + 1;
-		numberOfNodesGlobal += numberOfNodesPerLevel;
-		std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
+    for (uint level = 0; level <= maxLevel; level++)
+    {
+        const uint numberOfNodesPerLevel = coordX.getSize(level) + 1;
+        numberOfNodesGlobal += numberOfNodesPerLevel;
+        std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
 
 		setNumberOfNodes(numberOfNodesPerLevel, level);
 
@@ -130,9 +130,9 @@ void GridReader::allocArrays_BoundaryValues()
 
     for (uint i = 0; i < channelBoundaryConditions.size(); i++)
     {
-        if (     this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); } 
-		else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); } 
-		else if (this->channelBoundaryConditions[i] == "outflow")  { setOutflowValues(i);  }
+        if (     this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); }
+        else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); }
+        else if (this->channelBoundaryConditions[i] == "outflow")  { setOutflowValues(i);  }
     }
 
 	setVelocityValues();
@@ -218,16 +218,20 @@ void GridReader::allocArrays_OffsetScale()
     std::cout << "-----Ende OffsetScale------" << std::endl;
 }
 
-void GridReader::allocArrays_fluidNodeIndices() {
+void GridReader::allocArrays_taggedFluidNodes() {
     std::cout << "GridReader::allocArrays_fluidNodeIndices not implemented" << std::endl;
 	// TODO
 }
 
-void GridReader::allocArrays_fluidNodeIndicesBorder() {
-    std::cout << "GridReader::allocArrays_fluidNodeIndicesBorder not implemented" << std::endl;
+void GridReader::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level){
+    std::cout << "GridReader::tagFluidNodeIndices not implemented" << std::endl;
     // TODO
 }
 
+void GridReader::sortFluidNodeTags(){
+    std::cout << "GridReader::sortFluidNodeTags not implemented" << std::endl;
+    // TODO
+}
 
 void GridReader::setPressureValues(int channelSide) const
 {
@@ -281,23 +285,23 @@ void GridReader::fillVelocityVectors(int channelSide)
 			delete[] veloX_ValuesPerSide;
             delete[] veloY_ValuesPerSide;
             delete[] veloZ_ValuesPerSide;
-        }        
-	}
+        }
+    }
 
 
 }
 
-void GridReader::setVelocityValues() { 
+void GridReader::setVelocityValues() {
     for (int level = 0; level < (int)(velocityX_BCvalues.size()); level++) {
-        
-		int sizePerLevel = (int) velocityX_BCvalues[level].size();
+
+        int sizePerLevel = (int) velocityX_BCvalues[level].size();
         std::cout << "complete size velocity level " << level << " : " << sizePerLevel << std::endl;
         setVelocitySizePerLevel(level, sizePerLevel);
-        
-		if (sizePerLevel > 1) {
+
+        if (sizePerLevel > 1) {
             cudaMemoryManager->cudaAllocVeloBC(level);
             setVelocity(level, sizePerLevel);
-			cudaMemoryManager->cudaCopyVeloBC(level);
+            cudaMemoryManager->cudaCopyVeloBC(level);
         }
     }
 }
@@ -668,8 +672,8 @@ void GridReader::modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned
 /*------------------------------------------------------------------------------------------------*/
 /*---------------------------------------private q methods----------------------------------------*/
 /*------------------------------------------------------------------------------------------------*/
-void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, 
-										std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
+void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
+                                        std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
 {
     boundaryQ->setValuesInVector(Qs, level);
     boundaryQ->setIndexInVector(index, level);
@@ -685,7 +689,7 @@ void GridReader::copyVectorsToQStruct(std::vector<std::vector<real>> &Qs,
 
 	for (int direction = 0; direction < para->getD3Qxx(); direction++) {
         for (size_t indexQ = 0; indexQ < sizeOfValues; indexQ++) {
-            qTemp.q27[direction][indexQ] = Qs[direction][indexQ]; 
+            qTemp.q27[direction][indexQ] = Qs[direction][indexQ];
         }
     }
 
@@ -847,46 +851,46 @@ void GridReader::setBoundingBox()
 
 void GridReader::initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex,  std::string boundaryCondition)
 {
-	std::vector<unsigned int>neighVec;
-	std::vector<unsigned int>indexVec;
-	
-	int counter = 0;
-
-	for(unsigned int i=0; i<neighX->getLevel();i++) {
-		if(boundaryCondition =="periodic_y"){
-			neighVec = neighY->getVec(i);
-		} 
-		else if(boundaryCondition =="periodic_x"){
-			neighVec = neighX->getVec(i);
-		}
-		else if(boundaryCondition =="periodic_z"){
-			neighVec = neighZ->getVec(i);
-		}
-		else {
-			std::cout << "wrong String in periodicValue" << std::endl;
-			exit(1);
-		}
+    std::vector<unsigned int>neighVec;
+    std::vector<unsigned int>indexVec;
 
-		for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) {
-			if(periodV[i][0][counter] != 0) {
-				neighVec[*it]=periodV[i][0][counter];
-			}
+    int counter = 0;
 
-			counter++;
-		}
+    for(unsigned int i=0; i<neighX->getLevel();i++) {
+        if(boundaryCondition =="periodic_y"){
+            neighVec = neighY->getVec(i);
+        }
+        else if(boundaryCondition =="periodic_x"){
+            neighVec = neighX->getVec(i);
+        }
+        else if(boundaryCondition =="periodic_z"){
+            neighVec = neighZ->getVec(i);
+        }
+        else {
+            std::cout << "wrong String in periodicValue" << std::endl;
+            exit(1);
+        }
 
+        for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) {
+            if(periodV[i][0][counter] != 0) {
+                neighVec[*it]=periodV[i][0][counter];
+            }
 
-		if(boundaryCondition =="periodic_y"){
-			neighY->setVec(i, neighVec);
-		} 
-		else if(boundaryCondition =="periodic_x"){
-			neighX->setVec(i, neighVec);
-		}
-		else if(boundaryCondition =="periodic_z"){
-			neighZ->setVec(i, neighVec);
-		}
+            counter++;
+        }
 
-	}
+
+        if(boundaryCondition =="periodic_y"){
+            neighY->setVec(i, neighVec);
+        }
+        else if(boundaryCondition =="periodic_x"){
+            neighX->setVec(i, neighVec);
+        }
+        else if(boundaryCondition =="periodic_z"){
+            neighZ->setVec(i, neighVec);
+        }
+
+    }
 }
 
 void GridReader::makeReader(std::shared_ptr<Parameter> para)
@@ -917,9 +921,9 @@ void GridReader::makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, st
 
 void GridReader::setChannelBoundaryCondition()
 {
-	for (std::size_t i = 0; i < channelDirections.size(); i++)
-	{
-		this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition();
-		std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl;
-	}
-}
\ No newline at end of file
+    for (std::size_t i = 0; i < channelDirections.size(); i++)
+    {
+        this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition();
+        std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl;
+    }
+}
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
index 18efb6a7885191312ea4e2fbb22eb45162ab1de1..041d2c3ce94592f792c5a850eebd14c07f4db1b4 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
@@ -3,9 +3,9 @@
 
 #include "../GridProvider.h"
 
-#include <vector>
-#include <string>
 #include <memory>
+#include <string>
+#include <vector>
 
 #include "LBM/LB.h"
 
@@ -16,15 +16,14 @@ class BoundaryValues;
 class BoundaryQs;
 class CoordNeighborGeoV;
 
-class VIRTUALFLUIDS_GPU_EXPORT GridReader
-	: public GridProvider
+class VIRTUALFLUIDS_GPU_EXPORT GridReader : public GridProvider
 {
 private:
-	bool binaer;
-	std::vector<std::string> channelDirections;
-	std::vector<std::string> channelBoundaryConditions;
-	std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB;
-	std::vector<std::shared_ptr<BoundaryValues> > BC_Values;
+    bool binaer;
+    std::vector<std::string> channelDirections;
+    std::vector<std::string> channelBoundaryConditions;
+    std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB;
+    std::vector<std::shared_ptr<BoundaryValues>> BC_Values;
 
     std::vector<std::vector<real>> velocityX_BCvalues, velocityY_BCvalues, velocityZ_BCvalues;
     std::vector<std::vector<std::vector<real>>> velocityQs;
@@ -34,57 +33,62 @@ private:
     std::vector<std::vector<real>> outflowBCvalues;
 
 public:
-	GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
-    ~GridReader();
-	void allocArrays_CoordNeighborGeo() override;
-	void allocArrays_BoundaryValues() override;
+    GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para,
+               std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
+    ~GridReader() override;
+    void allocArrays_CoordNeighborGeo() override;
+    void allocArrays_BoundaryValues() override;
     void allocArrays_OffsetScale() override;
-    void allocArrays_fluidNodeIndices() override;
-    void allocArrays_fluidNodeIndicesBorder() override;
+    void allocArrays_taggedFluidNodes() override;
 
-	void initalValuesDomainDecompostion(int level);
+    void tagFluidNodeIndices(const std::vector<uint> &taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
 
-	void setChannelBoundaryCondition();
+    void sortFluidNodeTags() override;
 
-	void allocArrays_BoundaryQs() override;
-	bool getBinaer();
-	void setDimensions() override;
-	void setBoundingBox() override;
-	void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) override;
+    void initalValuesDomainDecompostion(int level);
+
+    void setChannelBoundaryCondition();
+
+    void allocArrays_BoundaryQs() override;
+    bool getBinaer();
+    void setDimensions() override;
+    void setBoundingBox() override;
+    void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int>>> periodV,
+                           std::vector<std::vector<unsigned int>> periodIndex, std::string way) override;
 
 private:
-	void makeReader(std::shared_ptr<Parameter> para);
-	void makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, std::shared_ptr<Parameter> para);
+    void makeReader(std::shared_ptr<Parameter> para);
+    void makeReader(std::vector<std::shared_ptr<BoundaryQs>> &BC_Qs, std::shared_ptr<Parameter> para);
 
-	void setPressureValues(int channelSide) const;
-	void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
+    void setPressureValues(int channelSide) const;
+    void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
 
-	void fillVelocityVectors(int channelSide);
+    void fillVelocityVectors(int channelSide);
     void setVelocityValues();
-	void setVelocity(int level, int sizePerLevel) const;
+    void setVelocity(int level, int sizePerLevel) const;
 
-	void setOutflowValues(int channelSide) const;
-	void setOutflow(int level, int sizePerLevel, int channelSide) const;
+    void setOutflowValues(int channelSide) const;
+    void setOutflow(int level, int sizePerLevel, int channelSide) const;
 
-
-	//void fillVelocityQVectors(int channelSide);
+    // void fillVelocityQVectors(int channelSide);
     void setPressQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ);
-	void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ);
+    void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
 
-	void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
+    void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
                                 std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
     void copyVectorsToQStruct(std::vector<std::vector<real>> &Qs, std::vector<int> &index,
                               QforBoundaryConditions &Q) const;
     void initalQStruct(QforBoundaryConditions &Q, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const;
-	bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setQ27Size(QforBoundaryConditions &Q, real *QQ, unsigned int sizeQ) const;
+    bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+
 public:
     void initalGridInformations() override;
 };
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 7f61b4357276f38d8fde71489dcf60348b402941..38a7eef7e356e2f2da4c1a819d8375035a37313a 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -1,5 +1,6 @@
 #include "GridGenerator.h"
 
+#include "LBM/LB.h"
 #include "Parameter/Parameter.h"
 #include "GridGenerator/grid/GridBuilder/GridBuilder.h"
 #include "GPU/CudaMemoryManager.h"
@@ -10,19 +11,24 @@
 #include <algorithm>
 #include "utilities/math/Math.h"
 #include "Output/QDebugWriter.hpp"
+#include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
 
 #include "utilities/communication.h"
 #include "Communication/Communicator.h"
 
+#include <logger/Logger.h>
+
 using namespace vf::lbm::dir;
 
-GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator):
-    mpiProcessID(communicator.getPID()), builder(builder)
+GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para,
+                             std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator &communicator)
+    : mpiProcessID(communicator.getPID()), builder(builder)
 {
     this->para = para;
     this->cudaMemoryManager = cudaMemoryManager;
     this->indexRearrangement = std::make_unique<IndexRearrangementForStreams>(para, builder, communicator);
-    this->interpolationGrouper = std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
+    this->interpolationGrouper =
+        std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
 }
 
 GridGenerator::~GridGenerator() = default;
@@ -55,15 +61,15 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
     std::cout << "Number of Level: " << numberOfLevels << std::endl;
     int numberOfNodesGlobal = 0;
     std::cout << "Number of Nodes: " << std::endl;
-    
-    for (uint level = 0; level < numberOfLevels; level++) 
+
+    for (uint level = 0; level < numberOfLevels; level++)
     {
-        const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
+        const uint numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
         numberOfNodesGlobal += numberOfNodesPerLevel;
         std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
-    
+
         setNumberOfNodes(numberOfNodesPerLevel, level);
-    
+
         cudaMemoryManager->cudaAllocCoord(level);
         cudaMemoryManager->cudaAllocSP(level);
         //cudaMemoryManager->cudaAllocF3SP(level);
@@ -71,7 +77,7 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
 
         if(para->getUseTurbulentViscosity())
             cudaMemoryManager->cudaAllocTurbulentViscosity(level);
-        
+
         if(para->getIsBodyForce())
             cudaMemoryManager->cudaAllocBodyForce(level);
 
@@ -100,28 +106,104 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
     std::cout << "-----finish Coord, Neighbor, Geo------" << std::endl;
 }
 
-void GridGenerator::allocArrays_fluidNodeIndices() {
-    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
-        setNumberOfFluidNodes(builder->getNumberOfFluidNodes(level), level);
-        cudaMemoryManager->cudaAllocFluidNodeIndices(level);
-        builder->getFluidNodeIndices(para->getParH(level)->fluidNodeIndices, level);
-        cudaMemoryManager->cudaCopyFluidNodeIndices(level);
-    }    
+void GridGenerator::allocArrays_taggedFluidNodes() {
+
+    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++)
+    {
+        for ( CollisionTemplate tag: all_CollisionTemplate )
+        {   //TODO: Need to add CollisionTemplate to GridBuilder to allow as argument and get rid of indivual get funtions for fluid node indices... and clean up this mess
+            switch(tag)
+            {
+                case CollisionTemplate::Default:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodes(level), CollisionTemplate::Default, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::Default, level);
+                    builder->getFluidNodeIndices(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::Default], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::Default, level);
+                    if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0)
+                        para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag);
+                    break;
+                case CollisionTemplate::SubDomainBorder:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::SubDomainBorder, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level);
+                    builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level);
+                    break;
+                case CollisionTemplate::WriteMacroVars:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesMacroVars(level), CollisionTemplate::WriteMacroVars, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::WriteMacroVars, level);
+                    builder->getFluidNodeIndicesMacroVars(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::WriteMacroVars], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::WriteMacroVars, level);
+                    if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0)
+                        para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag);
+                    break;
+                case CollisionTemplate::ApplyBodyForce:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesApplyBodyForce(level), CollisionTemplate::ApplyBodyForce, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::ApplyBodyForce, level);
+                    builder->getFluidNodeIndicesApplyBodyForce(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::ApplyBodyForce], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::ApplyBodyForce, level);
+                    if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0)
+                        para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag);
+                    break;
+                case CollisionTemplate::AllFeatures:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesAllFeatures(level), CollisionTemplate::AllFeatures, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::AllFeatures, level);
+                    builder->getFluidNodeIndicesAllFeatures(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::AllFeatures], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::AllFeatures, level);
+                    if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0)
+                        para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag);
+                    break;
+                default:
+                    break;
+            }
+        }
+        VF_LOG_INFO("Number of tagged nodes on level {}:", level);
+        VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}",
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default],
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder],
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::WriteMacroVars],
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::ApplyBodyForce],
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures]    );
+    }
 }
 
-void GridGenerator::allocArrays_fluidNodeIndicesBorder() {
-    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
-        setNumberOfFluidNodesBorder(builder->getNumberOfFluidNodesBorder(level), level);
-        cudaMemoryManager->cudaAllocFluidNodeIndicesBorder(level);
-        builder->getFluidNodeIndicesBorder(para->getParH(level)->fluidNodeIndicesBorder, level);
-        cudaMemoryManager->cudaCopyFluidNodeIndicesBorder(level);
+void GridGenerator::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) {
+    switch(tag)
+    {
+        case CollisionTemplate::WriteMacroVars:
+            builder->addFluidNodeIndicesMacroVars( taggedFluidNodeIndices, level );
+            break;
+        case CollisionTemplate::ApplyBodyForce:
+            builder->addFluidNodeIndicesApplyBodyForce( taggedFluidNodeIndices, level );
+            break;
+        case CollisionTemplate::AllFeatures:
+            builder->addFluidNodeIndicesAllFeatures( taggedFluidNodeIndices, level );
+            break;
+        case CollisionTemplate::Default:
+        case CollisionTemplate::SubDomainBorder:
+            throw std::runtime_error("Cannot tag fluid nodes as Default or SubDomainBorder!");
+        default:
+            throw std::runtime_error("Tagging fluid nodes with invald tag!");
+            break;
+
     }
+
+}
+
+void GridGenerator::sortFluidNodeTags() {
+    VF_LOG_INFO("Start sorting tagged fluid nodes...");
+    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++)
+    {
+        builder->sortFluidNodeIndicesAllFeatures(level); //has to be called first!
+        builder->sortFluidNodeIndicesMacroVars(level);
+        builder->sortFluidNodeIndicesApplyBodyForce(level);
+    }
+    VF_LOG_INFO("done.");
 }
 
 void GridGenerator::allocArrays_BoundaryValues()
 {
     std::cout << "------read BoundaryValues------" << std::endl;
-    int blocks = 0;
+    int blocks;
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPressureValues = int(builder->getPressureSize(level));
@@ -129,6 +211,7 @@ void GridGenerator::allocArrays_BoundaryValues()
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->pressureBC.numberOfBCnodes = 0;
+        para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor();
         if (numberOfPressureValues > 1)
         {
             blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1;
@@ -148,12 +231,12 @@ void GridGenerator::allocArrays_BoundaryValues()
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->slipBC.numberOfBCnodes = 0;
-        if (numberOfSlipValues > 1)
-        {
+        if (numberOfSlipValues > 1) {
             blocks = (numberOfSlipValues / para->getParH(level)->numberofthreads) + 1;
             para->getParH(level)->slipBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocSlipBC(level);
-            builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY, para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
+            builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY,
+                                   para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
             cudaMemoryManager->cudaCopySlipBC(level);
         }
         para->getParD(level)->slipBC.numberOfBCnodes = para->getParH(level)->slipBC.numberOfBCnodes;
@@ -173,11 +256,11 @@ void GridGenerator::allocArrays_BoundaryValues()
             para->getParH(level)->stressBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocStressBC(level);
             cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor());
-            builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ, 
+            builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ,
                                         para->getParH(level)->stressBC.Vx,       para->getParH(level)->stressBC.Vy,       para->getParH(level)->stressBC.Vz,
                                         para->getParH(level)->stressBC.Vx1,      para->getParH(level)->stressBC.Vy1,      para->getParH(level)->stressBC.Vz1,
-                                        para->getParH(level)->stressBC.k,        para->getParH(level)->stressBC.kN,       
-                                        para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0, 
+                                        para->getParH(level)->stressBC.k,        para->getParH(level)->stressBC.kN,
+                                        para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0,
                                         level);
 
             cudaMemoryManager->cudaCopyStressBC(level);
@@ -187,7 +270,7 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->getParH(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
         para->getParD(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
     }
-    
+
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfVelocityValues = int(builder->getVelocitySize(level));
@@ -204,7 +287,8 @@ void GridGenerator::allocArrays_BoundaryValues()
             cudaMemoryManager->cudaAllocVeloBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-            builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy, para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level);
+            builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy,
+                                       para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level);
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -242,6 +326,100 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->getParD(level)->numberOfVeloBCnodesRead = para->getParH(level)->velocityBC.numberOfBCnodes * para->getD3Qxx();
     }
 
+    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
+        const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level));
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "size precursor level " << level << " : " << numberOfPrecursorValues << "\n";
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
+        para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads;
+        para->getParD(level)->precursorBC.sizeQ = para->getParH(level)->precursorBC.sizeQ;
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        para->getParH(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues;
+        para->getParD(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues;
+        para->getParH(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
+        para->getParD(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
+
+        if (numberOfPrecursorValues > 1)
+        {
+            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            cudaMemoryManager->cudaAllocPrecursorBC(level);
+            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            builder->getPrecursorValues(
+                    para->getParH(level)->precursorBC.planeNeighbor0PP, para->getParH(level)->precursorBC.planeNeighbor0PM,
+                    para->getParH(level)->precursorBC.planeNeighbor0MP, para->getParH(level)->precursorBC.planeNeighbor0MM,
+                    para->getParH(level)->precursorBC.weights0PP, para->getParH(level)->precursorBC.weights0PM,
+                    para->getParH(level)->precursorBC.weights0MP, para->getParH(level)->precursorBC.weights0MM,
+                    para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes,
+                    para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads,
+                    para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ,
+                    level);
+            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            para->getParD(level)->precursorBC.numberOfPrecursorNodes = para->getParH(level)->precursorBC.numberOfPrecursorNodes;
+            para->getParD(level)->precursorBC.numberOfQuantities = para->getParH(level)->precursorBC.numberOfQuantities;
+            para->getParD(level)->precursorBC.timeStepsBetweenReads = para->getParH(level)->precursorBC.timeStepsBetweenReads;
+            para->getParD(level)->precursorBC.velocityX = para->getParH(level)->precursorBC.velocityX;
+            para->getParD(level)->precursorBC.velocityY = para->getParH(level)->precursorBC.velocityY;
+            para->getParD(level)->precursorBC.velocityZ = para->getParH(level)->precursorBC.velocityZ;
+
+            for(auto reader : para->getParH(level)->transientBCInputFileReader)
+            {
+                if(reader->getNumberOfQuantities() != para->getParD(level)->precursorBC.numberOfQuantities)
+                    throw std::runtime_error(
+                        "Number of quantities in reader and number of quantities needed for precursor don't match!");
+            }
+
+            cudaMemoryManager->cudaCopyPrecursorBC(level);
+            cudaMemoryManager->cudaAllocPrecursorData(level);
+
+            // read first timestep of precursor into next and copy to next on device
+            for(auto reader : para->getParH(level)->transientBCInputFileReader)
+            {
+                reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 0);
+            }
+
+            cudaMemoryManager->cudaCopyPrecursorData(level);
+
+            //switch next with last pointers
+            real* tmp = para->getParD(level)->precursorBC.last;
+            para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.next;
+            para->getParD(level)->precursorBC.next = tmp;
+
+            //read second timestep of precursor into next and copy next to device
+            real nextTime = para->getParD(level)->precursorBC.timeStepsBetweenReads*pow(2,-((real)level))*para->getTimeRatio();
+            for(auto reader : para->getParH(level)->transientBCInputFileReader)
+            {
+                reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime);
+            }
+
+            cudaMemoryManager->cudaCopyPrecursorData(level);
+
+            para->getParD(level)->precursorBC.nPrecursorReads = 1;
+
+
+            //switch next with current pointers
+            tmp = para->getParD(level)->precursorBC.current;
+            para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next;
+            para->getParD(level)->precursorBC.next = tmp;
+
+            //start usual cycle of loading, i.e. read velocities of timestep after current and copy asynchronously to device
+            for(auto reader : para->getParH(level)->transientBCInputFileReader)
+            {
+                reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 2*nextTime);
+            }
+
+            cudaMemoryManager->cudaCopyPrecursorData(level);
+
+            para->getParD(level)->precursorBC.nPrecursorReads = 2;
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // advection - diffusion stuff
+        if (para->getDiffOn()==true){
+            throw std::runtime_error(" Advection Diffusion not implemented for Precursor!");
+        }
+        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    }
+
 
 
     if (builder->hasGeometryValues()) {
@@ -303,7 +481,7 @@ void GridGenerator::initalValuesDomainDecompostion()
     if (para->getNumprocs() < 2)
         return;
     if ((para->getNumprocs() > 1) /*&& (procNeighborsSendX.size() == procNeighborsRecvX.size())*/) {
-        
+
         // direction has to be changed in case of periodic BCs and multiple sub domains
         std::vector<int> fillOrder = { 0, 1, 2, 3, 4, 5 };
 
@@ -383,7 +561,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborX[indexProcessNeighbor].index, direction,
                                                    level);
                         if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC)
-                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction);             
+                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction);
                         ////////////////////////////////////////////////////////////////////////////////////////
                         cudaMemoryManager->cudaCopyProcessNeighborXIndex(level, indexProcessNeighbor);
                         ////////////////////////////////////////////////////////////////////////////////////////
@@ -446,7 +624,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // malloc on host and device
                         cudaMemoryManager->cudaAllocProcessNeighborY(level, indexProcessNeighbor);
-                        ////////////////////////////////////////////////////////////////////////////////////////                        
+                        ////////////////////////////////////////////////////////////////////////////////////////
                         // init index arrays
                         builder->getSendIndices(para->getParH(level)->sendProcessNeighborY[indexProcessNeighbor].index, direction, level);
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborY[indexProcessNeighbor].index, direction,
@@ -465,7 +643,7 @@ void GridGenerator::initalValuesDomainDecompostion()
 
                     if (tempSend > 0) {
                         int indexProcessNeighbor = (int)para->getParH(level)->sendProcessNeighborZ.size();
-    
+
                         para->getParH(level)->sendProcessNeighborZ.emplace_back();
                         para->getParD(level)->sendProcessNeighborZ.emplace_back();
                         para->getParH(level)->recvProcessNeighborZ.emplace_back();
@@ -755,9 +933,9 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->pressureBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->pressureBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->pressureBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getPressureQs(Q.q27, i);
 
 
@@ -802,9 +980,9 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->slipBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->slipBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->slipBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getSlipQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopySlipBC(i);
@@ -822,9 +1000,9 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->stressBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->stressBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getStressQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopyStressBC(i);
@@ -842,7 +1020,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->velocityBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->velocityBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->velocityBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
             builder->getVelocityQs(Q.q27, i);
 
@@ -874,6 +1052,50 @@ void GridGenerator::allocArrays_BoundaryQs()
         }
     }
 
+    for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
+        const auto numberOfPrecursorNodes = int(builder->getPrecursorSize(i));
+        if (numberOfPrecursorNodes > 0)
+        {
+            std::cout << "size velocity level " << i << " : " << numberOfPrecursorNodes << std::endl;
+            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            //preprocessing
+            real* QQ = para->getParH(i)->precursorBC.q27[0];
+            unsigned int sizeQ = para->getParH(i)->precursorBC.numberOfBCnodes;
+            QforBoundaryConditions Q;
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
+
+            builder->getPrecursorQs(Q.q27, i);
+
+            if (para->getDiffOn()) {
+                throw std::runtime_error("Advection diffusion not implemented for Precursor!");
+                //////////////////////////////////////////////////////////////////////////
+                // para->getParH(i)->TempVel.kTemp = numberOfVelocityNodes;
+                // para->getParD(i)->TempVel.kTemp = numberOfVelocityNodes;
+                // std::cout << "Groesse TempVel.kTemp = " << para->getParH(i)->TempPress.kTemp << std::endl;
+                // std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl;
+                // std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl;
+                // //////////////////////////////////////////////////////////////////////////
+                // cudaMemoryManager->cudaAllocTempVeloBC(i);
+                // //cout << "nach alloc " << std::endl;
+                // //////////////////////////////////////////////////////////////////////////
+                // for (int m = 0; m < numberOfVelocityNodes; m++)
+                // {
+                //     para->getParH(i)->TempVel.temp[m] = para->getTemperatureInit();
+                //     para->getParH(i)->TempVel.tempPulse[m] = para->getTemperatureBC();
+                //     para->getParH(i)->TempVel.velo[m] = para->getVelocity();
+                //     para->getParH(i)->TempVel.k[m] = para->getParH(i)->Qinflow.k[m];
+                // }
+                // //////////////////////////////////////////////////////////////////////////
+                // //cout << "vor copy " << std::endl;
+                // cudaMemoryManager->cudaCopyTempVeloBCHD(i);
+                // //cout << "nach copy " << std::endl;
+                //////////////////////////////////////////////////////////////////////////
+            }
+            cudaMemoryManager->cudaCopyPrecursorBC(i);
+        }
+    }
+
+
 
     for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
         const int numberOfGeometryNodes = builder->getGeometrySize(i);
@@ -898,7 +1120,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->geometryBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->geometryBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->geometryBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
             //////////////////////////////////////////////////////////////////
 
@@ -948,7 +1170,7 @@ void GridGenerator::allocArrays_BoundaryQs()
 
 void GridGenerator::allocArrays_OffsetScale()
 {
-    for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++) 
+    for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++)
     {
         const uint numberOfNodesPerLevelCF = builder->getNumberOfNodesCF(level);
         const uint numberOfNodesPerLevelFC = builder->getNumberOfNodesFC(level);
@@ -987,7 +1209,7 @@ void GridGenerator::allocArrays_OffsetScale()
         builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level);
         builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level);
         builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level);
-        
+
         if (para->getUseStreams() || para->getNumprocs() > 1) {
             // split fine-to-coarse indices into border and bulk
             interpolationGrouper->splitFineToCoarseIntoBorderAndBulk(level);
@@ -1060,8 +1282,8 @@ std::string GridGenerator::verifyNeighborIndices(int level) const
     int wrongNeighbors = 0;
     int stopperNodes = 0;
 
-    for (uint index = 0; index < para->getParH(level)->numberOfNodes; index++)
-        oss << verifyNeighborIndex(level, index, invalidNodes, stopperNodes, wrongNeighbors);
+    for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
+        oss << verifyNeighborIndex(level, (int)index, invalidNodes, stopperNodes, wrongNeighbors);
 
 
     oss << "invalid nodes found: " << invalidNodes << "\n";
@@ -1090,7 +1312,7 @@ std::string GridGenerator::verifyNeighborIndex(int level, int index , int &inval
 
     //std::cout << para->getParH(level)->coordinateX[1] << ", " << para->getParH(level)->coordinateY[1] << ", " << para->getParH(level)->coordinateZ[1] << std::endl;
     //std::cout << para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] << std::endl;
-    
+
     real maxX = para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] - delta;
     real maxY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] - delta;
     real maxZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] - delta;
@@ -1131,8 +1353,8 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int
 
     if (!neighborValid) {
         oss << "NeighborX invalid from: (" << x << ", " << y << ", " << z << "), index: " << index << ", "
-            << direction << " neighborIndex: " << neighborIndex << 
-            ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ << 
+            << direction << " neighborIndex: " << neighborIndex <<
+            ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ <<
             "), expected neighborCoords : (" << neighborX << ", " << neighborY << ", " << neighborZ << ")\n";
         numberOfWrongNeihgbors++;
     }
@@ -1140,31 +1362,31 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int
 }
 
 void GridGenerator::getPointersToBoundaryConditions(QforBoundaryConditions& boundaryConditionStruct, real* subgridDistances, const unsigned int numberOfBCnodes){
-    boundaryConditionStruct.q27[DIR_P00] =    &subgridDistances[DIR_P00   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M00] =    &subgridDistances[DIR_M00   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0P0] =    &subgridDistances[DIR_0P0   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0M0] =    &subgridDistances[DIR_0M0   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_00P] =    &subgridDistances[DIR_00P   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_00M] =    &subgridDistances[DIR_00M   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PP0] =   &subgridDistances[DIR_PP0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MM0] =   &subgridDistances[DIR_MM0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PM0] =   &subgridDistances[DIR_PM0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MP0] =   &subgridDistances[DIR_MP0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_P0P] =   &subgridDistances[DIR_P0P  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M0M] =   &subgridDistances[DIR_M0M  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_P0M] =   &subgridDistances[DIR_P0M  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M0P] =   &subgridDistances[DIR_M0P  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0PP] =   &subgridDistances[DIR_0PP  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0MM] =   &subgridDistances[DIR_0MM  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0PM] =   &subgridDistances[DIR_0PM  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0MP] =   &subgridDistances[DIR_0MP  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000* numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PPP] =  &subgridDistances[DIR_PPP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MMP] =  &subgridDistances[DIR_MMP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PMP] =  &subgridDistances[DIR_PMP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MPP] =  &subgridDistances[DIR_MPP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PPM] =  &subgridDistances[DIR_PPM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MMM] =  &subgridDistances[DIR_MMM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PMM] =  &subgridDistances[DIR_PMM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MPM] =  &subgridDistances[DIR_MPM * numberOfBCnodes];
-}
\ No newline at end of file
+    boundaryConditionStruct.q27[DIR_P00] = &subgridDistances[DIR_P00 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M00] = &subgridDistances[DIR_M00 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00P] = &subgridDistances[DIR_00P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00M] = &subgridDistances[DIR_00M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfBCnodes];
+}
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
index d2f56e1df4ee5658c61b8e8a3e94a820d1a4f2f1..c97ed02a64da1d5fafa18150c75d149f96484d44 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
@@ -40,6 +40,7 @@
 
 #include "LBM/LB.h"
 
+
 class Parameter;
 class GridBuilder;
 class IndexRearrangementForStreams;
@@ -75,8 +76,10 @@ public:
     //! \brief allocates and initialized the sub-grid distances at the boundary conditions
     void allocArrays_BoundaryQs() override;
     void allocArrays_OffsetScale() override;
-    void allocArrays_fluidNodeIndices() override;
-    void allocArrays_fluidNodeIndicesBorder() override;
+    void allocArrays_taggedFluidNodes() override;
+
+    void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
+    void sortFluidNodeTags() override;
 
     virtual void setDimensions() override;
     virtual void setBoundingBox() override;
diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp
index bff054eb174a0f5fa34119deedde6f1c9733d83c..b1c398638cff1ec1b6d52f59f8e773183e270331 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp
+++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp
@@ -35,6 +35,11 @@ void BoundaryConditionFactory::setStressBoundaryCondition(const StressBC boundar
     this->stressBoundaryCondition = boundaryConditionType;
 }
 
+void BoundaryConditionFactory::setPrecursorBoundaryCondition(const PrecursorBC boundaryConditionType)
+{
+    this->precursorBoundaryCondition = boundaryConditionType;
+}
+
 boundaryCondition BoundaryConditionFactory::getVelocityBoundaryConditionPost(bool isGeometryBC) const
 {
     const VelocityBC &boundaryCondition =
@@ -132,6 +137,22 @@ boundaryCondition BoundaryConditionFactory::getPressureBoundaryConditionPre() co
         case PressureBC::OutflowNonReflective:
             return QPressNoRhoDev27;
             break;
+        case PressureBC::OutflowNonReflectivePressureCorrection:
+            return QPressZeroRhoOutflowDev27;
+        default:
+            return nullptr;
+    }
+}
+
+precursorBoundaryConditionFunc BoundaryConditionFactory::getPrecursorBoundaryConditionPost() const
+{
+    switch (this->precursorBoundaryCondition) {
+        case PrecursorBC::VelocityPrecursor:
+            return QPrecursorDevCompZeroPress;
+            break;
+        case PrecursorBC::DistributionsPrecursor:
+            return PrecursorDevDistributions;
+            break;
         default:
             return nullptr;
     }
diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
index 9d6872c4847be72dff4be7137b774c8082e39e34..c6877cbfeffe5b32c0c2d336e46b02d68cd946a3 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
+++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
@@ -42,11 +42,13 @@
 #include "Parameter/Parameter.h"
 #include "gpu/GridGenerator/grid/BoundaryConditions/Side.h"
 
+
 struct LBMSimulationParameter;
 class Parameter;
 
 using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>;
 using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
+using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real timeRatio, real velocityRatio)>;
 
 class BoundaryConditionFactory
 {
@@ -109,6 +111,8 @@ public:
         PressureNonEquilibriumCompressible,
         //! - OutflowNonReflective = outflow boundary condition, should be combined with VelocityAndPressureCompressible
         OutflowNonReflective,
+        //! - OutflowNonreflectivePressureCorrection = like OutflowNonReflective, but also reduces pressure overshoot
+        OutflowNonReflectivePressureCorrection,
         //! - NotSpecified =  the user did not set a boundary condition
         NotSpecified
     };
@@ -128,11 +132,21 @@ public:
     // enum class OutflowBoundaryCondition {};  // TODO:
     // https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16
 
+    enum class PrecursorBC {
+        //! - VelocityPrecursor
+        VelocityPrecursor,
+        //! - DisitributionsPrecursor
+        DistributionsPrecursor,
+        //! - NotSpecified =  the user did not set a boundary condition
+        NotSpecified
+    };
+
     void setVelocityBoundaryCondition(const BoundaryConditionFactory::VelocityBC boundaryConditionType);
     void setNoSlipBoundaryCondition(const BoundaryConditionFactory::NoSlipBC boundaryConditionType);
     void setSlipBoundaryCondition(const BoundaryConditionFactory::SlipBC boundaryConditionType);
     void setPressureBoundaryCondition(const BoundaryConditionFactory::PressureBC boundaryConditionType);
     void setStressBoundaryCondition(const BoundaryConditionFactory::StressBC boundaryConditionType);
+    void setPrecursorBoundaryCondition(const BoundaryConditionFactory::PrecursorBC boundaryConditionType);
     //! \brief set a boundary condition for the geometry
     //! param boundaryConditionType: a velocity, no-slip or slip boundary condition
     //! \details suggestions for boundaryConditionType:
@@ -152,6 +166,8 @@ public:
     [[nodiscard]] boundaryCondition getSlipBoundaryConditionPost(bool isGeometryBC = false) const;
     [[nodiscard]] boundaryCondition getPressureBoundaryConditionPre() const;
     [[nodiscard]] boundaryCondition getGeometryBoundaryConditionPost() const;
+    [[nodiscard]] precursorBoundaryConditionFunc getPrecursorBoundaryConditionPost() const;
+
 
     [[nodiscard]] boundaryConditionWithParameter getStressBoundaryConditionPost() const;
 
@@ -162,6 +178,7 @@ private:
     PressureBC pressureBoundaryCondition = PressureBC::NotSpecified;
     std::variant<VelocityBC, NoSlipBC, SlipBC> geometryBoundaryCondition = NoSlipBC::NoSlipImplicitBounceBack;
     StressBC stressBoundaryCondition = StressBC::NotSpecified;
+    PrecursorBC precursorBoundaryCondition = PrecursorBC::NotSpecified;
 
     // OutflowBoundaryConditon outflowBC // TODO: https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16
 };
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
index 8f54358e04063c9063c873caf02a86e76bb7f936..04f6afe4cf9ebd99dc293ded16f55a56f0d77036 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
@@ -74,7 +74,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	uint* neighborZ,
 	real* distributions,
 	real* distributionsAD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* forces,
 	bool isEvenTimestep)
 {
@@ -100,7 +100,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 
 	//////////////////////////////////////////////////////////////////////////
 	// run for all indices in size_Mat and fluid nodes
-	if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+	if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -109,125 +109,125 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		Distributions27 dist;
 		if (isEvenTimestep)
 		{
-			dist.f[DIR_P00   ] = &distributions[DIR_P00   *size_Mat];
-			dist.f[DIR_M00   ] = &distributions[DIR_M00   *size_Mat];
-			dist.f[DIR_0P0   ] = &distributions[DIR_0P0   *size_Mat];
-			dist.f[DIR_0M0   ] = &distributions[DIR_0M0   *size_Mat];
-			dist.f[DIR_00P   ] = &distributions[DIR_00P   *size_Mat];
-			dist.f[DIR_00M   ] = &distributions[DIR_00M   *size_Mat];
-			dist.f[DIR_PP0  ] = &distributions[DIR_PP0  *size_Mat];
-			dist.f[DIR_MM0  ] = &distributions[DIR_MM0  *size_Mat];
-			dist.f[DIR_PM0  ] = &distributions[DIR_PM0  *size_Mat];
-			dist.f[DIR_MP0  ] = &distributions[DIR_MP0  *size_Mat];
-			dist.f[DIR_P0P  ] = &distributions[DIR_P0P  *size_Mat];
-			dist.f[DIR_M0M  ] = &distributions[DIR_M0M  *size_Mat];
-			dist.f[DIR_P0M  ] = &distributions[DIR_P0M  *size_Mat];
-			dist.f[DIR_M0P  ] = &distributions[DIR_M0P  *size_Mat];
-			dist.f[DIR_0PP  ] = &distributions[DIR_0PP  *size_Mat];
-			dist.f[DIR_0MM  ] = &distributions[DIR_0MM  *size_Mat];
-			dist.f[DIR_0PM  ] = &distributions[DIR_0PM  *size_Mat];
-			dist.f[DIR_0MP  ] = &distributions[DIR_0MP  *size_Mat];
-			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
-			dist.f[DIR_PPP ] = &distributions[DIR_PPP *size_Mat];
-			dist.f[DIR_MMP ] = &distributions[DIR_MMP *size_Mat];
-			dist.f[DIR_PMP ] = &distributions[DIR_PMP *size_Mat];
-			dist.f[DIR_MPP ] = &distributions[DIR_MPP *size_Mat];
-			dist.f[DIR_PPM ] = &distributions[DIR_PPM *size_Mat];
-			dist.f[DIR_MMM ] = &distributions[DIR_MMM *size_Mat];
-			dist.f[DIR_PMM ] = &distributions[DIR_PMM *size_Mat];
-			dist.f[DIR_MPM ] = &distributions[DIR_MPM *size_Mat];
+			dist.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+			dist.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+			dist.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+			dist.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+			dist.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+			dist.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+			dist.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+			dist.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+			dist.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+			dist.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+			dist.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+			dist.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+			dist.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+			dist.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+			dist.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+			dist.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+			dist.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+			dist.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+			dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+			dist.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+			dist.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+			dist.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+			dist.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+			dist.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+			dist.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+			dist.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+			dist.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			dist.f[DIR_M00   ] = &distributions[DIR_P00   *size_Mat];
-			dist.f[DIR_P00   ] = &distributions[DIR_M00   *size_Mat];
-			dist.f[DIR_0M0   ] = &distributions[DIR_0P0   *size_Mat];
-			dist.f[DIR_0P0   ] = &distributions[DIR_0M0   *size_Mat];
-			dist.f[DIR_00M   ] = &distributions[DIR_00P   *size_Mat];
-			dist.f[DIR_00P   ] = &distributions[DIR_00M   *size_Mat];
-			dist.f[DIR_MM0  ] = &distributions[DIR_PP0  *size_Mat];
-			dist.f[DIR_PP0  ] = &distributions[DIR_MM0  *size_Mat];
-			dist.f[DIR_MP0  ] = &distributions[DIR_PM0  *size_Mat];
-			dist.f[DIR_PM0  ] = &distributions[DIR_MP0  *size_Mat];
-			dist.f[DIR_M0M  ] = &distributions[DIR_P0P  *size_Mat];
-			dist.f[DIR_P0P  ] = &distributions[DIR_M0M  *size_Mat];
-			dist.f[DIR_M0P  ] = &distributions[DIR_P0M  *size_Mat];
-			dist.f[DIR_P0M  ] = &distributions[DIR_M0P  *size_Mat];
-			dist.f[DIR_0MM  ] = &distributions[DIR_0PP  *size_Mat];
-			dist.f[DIR_0PP  ] = &distributions[DIR_0MM  *size_Mat];
-			dist.f[DIR_0MP  ] = &distributions[DIR_0PM  *size_Mat];
-			dist.f[DIR_0PM  ] = &distributions[DIR_0MP  *size_Mat];
-			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
-			dist.f[DIR_MMM ] = &distributions[DIR_PPP *size_Mat];
-			dist.f[DIR_PPM ] = &distributions[DIR_MMP *size_Mat];
-			dist.f[DIR_MPM ] = &distributions[DIR_PMP *size_Mat];
-			dist.f[DIR_PMM ] = &distributions[DIR_MPP *size_Mat];
-			dist.f[DIR_MMP ] = &distributions[DIR_PPM *size_Mat];
-			dist.f[DIR_PPP ] = &distributions[DIR_MMM *size_Mat];
-			dist.f[DIR_MPP ] = &distributions[DIR_PMM *size_Mat];
-			dist.f[DIR_PMP ] = &distributions[DIR_MPM *size_Mat];
+			dist.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+			dist.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+			dist.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+			dist.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+			dist.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+			dist.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+			dist.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+			dist.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+			dist.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+			dist.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+			dist.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+			dist.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+			dist.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+			dist.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+			dist.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+			dist.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+			dist.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+			dist.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+			dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+			dist.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+			dist.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+			dist.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
+			dist.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+			dist.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+			dist.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+			dist.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+			dist.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		//! - Set neighbor indices (necessary for indirect addressing)
@@ -241,63 +241,63 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Fluid
 		//!
-		real fcbb = (dist.f[DIR_P00   ])[k];
-		real fabb = (dist.f[DIR_M00   ])[kw];
-		real fbcb = (dist.f[DIR_0P0   ])[k];
-		real fbab = (dist.f[DIR_0M0   ])[ks];
-		real fbbc = (dist.f[DIR_00P   ])[k];
-		real fbba = (dist.f[DIR_00M   ])[kb];
-		real fccb = (dist.f[DIR_PP0  ])[k];
-		real faab = (dist.f[DIR_MM0  ])[ksw];
-		real fcab = (dist.f[DIR_PM0  ])[ks];
-		real facb = (dist.f[DIR_MP0  ])[kw];
-		real fcbc = (dist.f[DIR_P0P  ])[k];
-		real faba = (dist.f[DIR_M0M  ])[kbw];
-		real fcba = (dist.f[DIR_P0M  ])[kb];
-		real fabc = (dist.f[DIR_M0P  ])[kw];
-		real fbcc = (dist.f[DIR_0PP  ])[k];
-		real fbaa = (dist.f[DIR_0MM  ])[kbs];
-		real fbca = (dist.f[DIR_0PM  ])[kb];
-		real fbac = (dist.f[DIR_0MP  ])[ks];
+		real fcbb = (dist.f[DIR_P00])[k];
+		real fabb = (dist.f[DIR_M00])[kw];
+		real fbcb = (dist.f[DIR_0P0])[k];
+		real fbab = (dist.f[DIR_0M0])[ks];
+		real fbbc = (dist.f[DIR_00P])[k];
+		real fbba = (dist.f[DIR_00M])[kb];
+		real fccb = (dist.f[DIR_PP0])[k];
+		real faab = (dist.f[DIR_MM0])[ksw];
+		real fcab = (dist.f[DIR_PM0])[ks];
+		real facb = (dist.f[DIR_MP0])[kw];
+		real fcbc = (dist.f[DIR_P0P])[k];
+		real faba = (dist.f[DIR_M0M])[kbw];
+		real fcba = (dist.f[DIR_P0M])[kb];
+		real fabc = (dist.f[DIR_M0P])[kw];
+		real fbcc = (dist.f[DIR_0PP])[k];
+		real fbaa = (dist.f[DIR_0MM])[kbs];
+		real fbca = (dist.f[DIR_0PM])[kb];
+		real fbac = (dist.f[DIR_0MP])[ks];
 		real fbbb = (dist.f[DIR_000])[k];
-		real fccc = (dist.f[DIR_PPP ])[k];
-		real faac = (dist.f[DIR_MMP ])[ksw];
-		real fcac = (dist.f[DIR_PMP ])[ks];
-		real facc = (dist.f[DIR_MPP ])[kw];
-		real fcca = (dist.f[DIR_PPM ])[kb];
-		real faaa = (dist.f[DIR_MMM ])[kbsw];
-		real fcaa = (dist.f[DIR_PMM ])[kbs];
-		real faca = (dist.f[DIR_MPM ])[kbw];
+		real fccc = (dist.f[DIR_PPP])[k];
+		real faac = (dist.f[DIR_MMP])[ksw];
+		real fcac = (dist.f[DIR_PMP])[ks];
+		real facc = (dist.f[DIR_MPP])[kw];
+		real fcca = (dist.f[DIR_PPM])[kb];
+		real faaa = (dist.f[DIR_MMM])[kbsw];
+		real fcaa = (dist.f[DIR_PMM])[kbs];
+		real faca = (dist.f[DIR_MPM])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Advection Diffusion
 		//!
-		real mfcbb = (distAD.f[DIR_P00   ])[k];
-		real mfabb = (distAD.f[DIR_M00   ])[kw];
-		real mfbcb = (distAD.f[DIR_0P0   ])[k];
-		real mfbab = (distAD.f[DIR_0M0   ])[ks];
-		real mfbbc = (distAD.f[DIR_00P   ])[k];
-		real mfbba = (distAD.f[DIR_00M   ])[kb];
-		real mfccb = (distAD.f[DIR_PP0  ])[k];
-		real mfaab = (distAD.f[DIR_MM0  ])[ksw];
-		real mfcab = (distAD.f[DIR_PM0  ])[ks];
-		real mfacb = (distAD.f[DIR_MP0  ])[kw];
-		real mfcbc = (distAD.f[DIR_P0P  ])[k];
-		real mfaba = (distAD.f[DIR_M0M  ])[kbw];
-		real mfcba = (distAD.f[DIR_P0M  ])[kb];
-		real mfabc = (distAD.f[DIR_M0P  ])[kw];
-		real mfbcc = (distAD.f[DIR_0PP  ])[k];
-		real mfbaa = (distAD.f[DIR_0MM  ])[kbs];
-		real mfbca = (distAD.f[DIR_0PM  ])[kb];
-		real mfbac = (distAD.f[DIR_0MP  ])[ks];
+		real mfcbb = (distAD.f[DIR_P00])[k];
+		real mfabb = (distAD.f[DIR_M00])[kw];
+		real mfbcb = (distAD.f[DIR_0P0])[k];
+		real mfbab = (distAD.f[DIR_0M0])[ks];
+		real mfbbc = (distAD.f[DIR_00P])[k];
+		real mfbba = (distAD.f[DIR_00M])[kb];
+		real mfccb = (distAD.f[DIR_PP0])[k];
+		real mfaab = (distAD.f[DIR_MM0])[ksw];
+		real mfcab = (distAD.f[DIR_PM0])[ks];
+		real mfacb = (distAD.f[DIR_MP0])[kw];
+		real mfcbc = (distAD.f[DIR_P0P])[k];
+		real mfaba = (distAD.f[DIR_M0M])[kbw];
+		real mfcba = (distAD.f[DIR_P0M])[kb];
+		real mfabc = (distAD.f[DIR_M0P])[kw];
+		real mfbcc = (distAD.f[DIR_0PP])[k];
+		real mfbaa = (distAD.f[DIR_0MM])[kbs];
+		real mfbca = (distAD.f[DIR_0PM])[kb];
+		real mfbac = (distAD.f[DIR_0MP])[ks];
 		real mfbbb = (distAD.f[DIR_000])[k];
-		real mfccc = (distAD.f[DIR_PPP ])[k];
-		real mfaac = (distAD.f[DIR_MMP ])[ksw];
-		real mfcac = (distAD.f[DIR_PMP ])[ks];
-		real mfacc = (distAD.f[DIR_MPP ])[kw];
-		real mfcca = (distAD.f[DIR_PPM ])[kb];
-		real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
-		real mfcaa = (distAD.f[DIR_PMM ])[kbs];
-		real mfaca = (distAD.f[DIR_MPM ])[kbw];
+		real mfccc = (distAD.f[DIR_PPP])[k];
+		real mfaac = (distAD.f[DIR_MMP])[ksw];
+		real mfcac = (distAD.f[DIR_PMP])[ks];
+		real mfacc = (distAD.f[DIR_MPP])[kw];
+		real mfcca = (distAD.f[DIR_PPM])[kb];
+		real mfaaa = (distAD.f[DIR_MMM])[kbsw];
+		real mfcaa = (distAD.f[DIR_PMM])[kbs];
+		real mfaca = (distAD.f[DIR_MPM])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 		//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -503,33 +503,33 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		//! stored arrays dependent on timestep is based on the esoteric twist algorithm
 		//! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
 		//!
-		(distAD.f[DIR_P00   ])[k   ] = mfabb;
-		(distAD.f[DIR_M00   ])[kw  ] = mfcbb;
-		(distAD.f[DIR_0P0   ])[k   ] = mfbab;
-		(distAD.f[DIR_0M0   ])[ks  ] = mfbcb;
-		(distAD.f[DIR_00P   ])[k   ] = mfbba;
-		(distAD.f[DIR_00M   ])[kb  ] = mfbbc;
-		(distAD.f[DIR_PP0  ])[k   ] = mfaab;
-		(distAD.f[DIR_MM0  ])[ksw ] = mfccb;
-		(distAD.f[DIR_PM0  ])[ks  ] = mfacb;
-		(distAD.f[DIR_MP0  ])[kw  ] = mfcab;
-		(distAD.f[DIR_P0P  ])[k   ] = mfaba;
-		(distAD.f[DIR_M0M  ])[kbw ] = mfcbc;
-		(distAD.f[DIR_P0M  ])[kb  ] = mfabc;
-		(distAD.f[DIR_M0P  ])[kw  ] = mfcba;
-		(distAD.f[DIR_0PP  ])[k   ] = mfbaa;
-		(distAD.f[DIR_0MM  ])[kbs ] = mfbcc;
-		(distAD.f[DIR_0PM  ])[kb  ] = mfbac;
-		(distAD.f[DIR_0MP  ])[ks  ] = mfbca;
+		(distAD.f[DIR_P00])[k   ] = mfabb;
+		(distAD.f[DIR_M00])[kw  ] = mfcbb;
+		(distAD.f[DIR_0P0])[k   ] = mfbab;
+		(distAD.f[DIR_0M0])[ks  ] = mfbcb;
+		(distAD.f[DIR_00P])[k   ] = mfbba;
+		(distAD.f[DIR_00M])[kb  ] = mfbbc;
+		(distAD.f[DIR_PP0])[k   ] = mfaab;
+		(distAD.f[DIR_MM0])[ksw ] = mfccb;
+		(distAD.f[DIR_PM0])[ks  ] = mfacb;
+		(distAD.f[DIR_MP0])[kw  ] = mfcab;
+		(distAD.f[DIR_P0P])[k   ] = mfaba;
+		(distAD.f[DIR_M0M])[kbw ] = mfcbc;
+		(distAD.f[DIR_P0M])[kb  ] = mfabc;
+		(distAD.f[DIR_M0P])[kw  ] = mfcba;
+		(distAD.f[DIR_0PP])[k   ] = mfbaa;
+		(distAD.f[DIR_0MM])[kbs ] = mfbcc;
+		(distAD.f[DIR_0PM])[kb  ] = mfbac;
+		(distAD.f[DIR_0MP])[ks  ] = mfbca;
 		(distAD.f[DIR_000])[k   ] = mfbbb;
-		(distAD.f[DIR_PPP ])[k   ] = mfaaa;
-		(distAD.f[DIR_PMP ])[ks  ] = mfaca;
-		(distAD.f[DIR_PPM ])[kb  ] = mfaac;
-		(distAD.f[DIR_PMM ])[kbs ] = mfacc;
-		(distAD.f[DIR_MPP ])[kw  ] = mfcaa;
-		(distAD.f[DIR_MMP ])[ksw ] = mfcca;
-		(distAD.f[DIR_MPM ])[kbw ] = mfcac;
-		(distAD.f[DIR_MMM ])[kbsw] = mfccc;
+		(distAD.f[DIR_PPP])[k   ] = mfaaa;
+		(distAD.f[DIR_PMP])[ks  ] = mfaca;
+		(distAD.f[DIR_PPM])[kb  ] = mfaac;
+		(distAD.f[DIR_PMM])[kbs ] = mfacc;
+		(distAD.f[DIR_MPP])[kw  ] = mfcaa;
+		(distAD.f[DIR_MMP])[ksw ] = mfcca;
+		(distAD.f[DIR_MPM])[kbw ] = mfcac;
+		(distAD.f[DIR_MMM])[kbsw] = mfccc;
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
index ecf98a7494a0a5e1c81c1040917e941f066605e6..116ce20389985e0efa650598108224b2e3e25221 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
@@ -20,91 +20,91 @@ __global__ void QADPress7(  real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -128,24 +128,24 @@ __global__ void QADPress7(  real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -188,32 +188,32 @@ __global__ void QADPress7(  real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       //real vx1_Inflow   = zero;
@@ -293,23 +293,23 @@ __global__ void QADPress7(  real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -461,131 +461,131 @@ __global__ void QADPress27( real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -606,24 +606,24 @@ __global__ void QADPress27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -663,33 +663,33 @@ __global__ void QADPress27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       //drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -715,33 +715,33 @@ __global__ void QADPress27( real* DD,
       vx2            =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3            =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -849,86 +849,86 @@ __global__ void QADPress27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -989,132 +989,132 @@ __global__ void QADPressNEQNeighbor27(
 													unsigned int* neighborX,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
-													unsigned int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool isEvenTimestep
 												)
 {
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 
 	Distributions27 D27;
 	if (isEvenTimestep == true)
 	{
-		D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-		D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-		D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-		D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-		D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-		D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-		D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-		D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-		D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-		D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-		D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-		D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-		D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-		D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-		D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-		D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-		D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-		D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-		D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-		D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-		D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-		D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-		D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-		D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-		D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-		D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+		D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+		D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+		D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+		D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+		D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+		D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+		D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+		D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+		D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+		D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+		D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+		D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+		D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+		D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+		D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+		D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+		D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+		D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+		D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+		D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+		D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+		D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+		D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+		D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+		D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+		D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+		D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-		D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-		D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-		D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-		D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-		D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-		D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-		D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-		D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-		D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-		D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-		D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-		D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-		D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-		D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-		D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-		D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-		D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-		D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-		D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-		D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
-		D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-		D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-		D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-		D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-		D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
+		D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+		D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+		D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+		D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+		D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+		D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+		D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+		D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+		D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+		D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+		D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+		D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+		D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+		D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+		D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+		D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+		D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+		D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+		D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+		D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+		D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+		D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+		D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+		D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+		D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+		D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+		D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1345,33 +1345,33 @@ __global__ void QADPressNEQNeighbor27(
 		unsigned int kNbsw = neighborZ[kNsw];
 		////////////////////////////////////////////////////////////////////////////////
 		//update distributions at neighbor nodes
-        (D27.f[DIR_P00   ])[kNe   ] = f27_W   ;  
-        (D27.f[DIR_M00   ])[kNw   ] = f27_E   ;	
-        (D27.f[DIR_0P0   ])[kNn   ] = f27_S   ;	
-        (D27.f[DIR_0M0   ])[kNs   ] = f27_N   ;	
-        (D27.f[DIR_00P   ])[kNt   ] = f27_B   ;	
-        (D27.f[DIR_00M   ])[kNb   ] = f27_T   ;	
-        (D27.f[DIR_PP0  ])[kNne  ] = f27_SW  ;	
-        (D27.f[DIR_MM0  ])[kNsw  ] = f27_NE  ;	
-        (D27.f[DIR_PM0  ])[kNse  ] = f27_NW  ;	
-        (D27.f[DIR_MP0  ])[kNnw  ] = f27_SE  ;	
-        (D27.f[DIR_P0P  ])[kNte  ] = f27_BW  ;	
-        (D27.f[DIR_M0M  ])[kNbw  ] = f27_TE  ;	
-        (D27.f[DIR_P0M  ])[kNbe  ] = f27_TW  ;	
-        (D27.f[DIR_M0P  ])[kNtw  ] = f27_BE  ;	
-        (D27.f[DIR_0PP  ])[kNtn  ] = f27_BS  ;	
-        (D27.f[DIR_0MM  ])[kNbs  ] = f27_TN  ;	
-        (D27.f[DIR_0PM  ])[kNbn  ] = f27_TS  ;	
-        (D27.f[DIR_0MP  ])[kNts  ] = f27_BN  ;	
+        (D27.f[DIR_P00])[kNe   ] = f27_W   ;  
+        (D27.f[DIR_M00])[kNw   ] = f27_E   ;	
+        (D27.f[DIR_0P0])[kNn   ] = f27_S   ;	
+        (D27.f[DIR_0M0])[kNs   ] = f27_N   ;	
+        (D27.f[DIR_00P])[kNt   ] = f27_B   ;	
+        (D27.f[DIR_00M])[kNb   ] = f27_T   ;	
+        (D27.f[DIR_PP0])[kNne  ] = f27_SW  ;	
+        (D27.f[DIR_MM0])[kNsw  ] = f27_NE  ;	
+        (D27.f[DIR_PM0])[kNse  ] = f27_NW  ;	
+        (D27.f[DIR_MP0])[kNnw  ] = f27_SE  ;	
+        (D27.f[DIR_P0P])[kNte  ] = f27_BW  ;	
+        (D27.f[DIR_M0M])[kNbw  ] = f27_TE  ;	
+        (D27.f[DIR_P0M])[kNbe  ] = f27_TW  ;	
+        (D27.f[DIR_M0P])[kNtw  ] = f27_BE  ;	
+        (D27.f[DIR_0PP])[kNtn  ] = f27_BS  ;	
+        (D27.f[DIR_0MM])[kNbs  ] = f27_TN  ;	
+        (D27.f[DIR_0PM])[kNbn  ] = f27_TS  ;	
+        (D27.f[DIR_0MP])[kNts  ] = f27_BN  ;	
         (D27.f[DIR_000])[kNzero] = f27_ZERO;	
-        (D27.f[DIR_PPP ])[kNtne ] = f27_BSW ;	
-        (D27.f[DIR_MMP ])[kNtsw ] = f27_BNE ;	
-        (D27.f[DIR_PMP ])[kNtse ] = f27_BNW ;	
-        (D27.f[DIR_MPP ])[kNtnw ] = f27_BSE ;	
-        (D27.f[DIR_PPM ])[kNbne ] = f27_TSW ;	
-        (D27.f[DIR_MMM ])[kNbsw ] = f27_TNE ;	
-        (D27.f[DIR_PMM ])[kNbse ] = f27_TNW ;	
-        (D27.f[DIR_MPM ])[kNbnw ] = f27_TSE ;       
+        (D27.f[DIR_PPP])[kNtne ] = f27_BSW ;	
+        (D27.f[DIR_MMP])[kNtsw ] = f27_BNE ;	
+        (D27.f[DIR_PMP])[kNtse ] = f27_BNW ;	
+        (D27.f[DIR_MPP])[kNtnw ] = f27_BSE ;	
+        (D27.f[DIR_PPM])[kNbne ] = f27_TSW ;	
+        (D27.f[DIR_MMM])[kNbsw ] = f27_TNE ;	
+        (D27.f[DIR_PMM])[kNbse ] = f27_TNW ;	
+        (D27.f[DIR_MPM])[kNbnw ] = f27_TSE ;       
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1427,91 +1427,91 @@ __global__ void QADVel7( real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat, 
+                                    unsigned long long numberOfLBnodes, 
                                     bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -1531,12 +1531,12 @@ __global__ void QADVel7( real* DD,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;//, 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1571,32 +1571,32 @@ __global__ void QADVel7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       real vx1_Inflow   = c0o1;
@@ -1676,23 +1676,23 @@ __global__ void QADVel7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1844,131 +1844,131 @@ __global__ void QADVel27(real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat, 
+                                    unsigned long long numberOfLBnodes, 
                                     bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1989,24 +1989,24 @@ __global__ void QADVel27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2046,33 +2046,33 @@ __global__ void QADVel27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2098,33 +2098,33 @@ __global__ void QADVel27(real* DD,
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -2233,63 +2233,63 @@ __global__ void QADVel27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -2299,24 +2299,24 @@ __global__ void QADVel27(real* DD,
       //Test
       //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D27.f[DIR_M00  ])[kw  ]= four;
-      //(D27.f[DIR_P00  ])[ke  ]= four;
-      //(D27.f[DIR_0M0  ])[ks  ]= four;
-      //(D27.f[DIR_0P0  ])[kn  ]= four;
-      //(D27.f[DIR_00M  ])[kb  ]= four;
-      //(D27.f[DIR_00P  ])[kt  ]= four;
-      //(D27.f[DIR_MM0 ])[ksw ]= four;
-      //(D27.f[DIR_PP0 ])[kne ]= four;
-      //(D27.f[DIR_MP0 ])[knw ]= four;
-      //(D27.f[DIR_PM0 ])[kse ]= four;
-      //(D27.f[DIR_M0M ])[kbw ]= four;
-      //(D27.f[DIR_P0P ])[kte ]= four;
-      //(D27.f[DIR_M0P ])[ktw ]= four;
-      //(D27.f[DIR_P0M ])[kbe ]= four;
-      //(D27.f[DIR_0MM ])[kbs ]= four;
-      //(D27.f[DIR_0PP ])[ktn ]= four;
-      //(D27.f[DIR_0MP ])[kts ]= four;
-      //(D27.f[DIR_0PM ])[kbn ]= four;
+      //(D27.f[DIR_M00])[kw  ]= four;
+      //(D27.f[DIR_P00])[ke  ]= four;
+      //(D27.f[DIR_0M0])[ks  ]= four;
+      //(D27.f[DIR_0P0])[kn  ]= four;
+      //(D27.f[DIR_00M])[kb  ]= four;
+      //(D27.f[DIR_00P])[kt  ]= four;
+      //(D27.f[DIR_MM0])[ksw ]= four;
+      //(D27.f[DIR_PP0])[kne ]= four;
+      //(D27.f[DIR_MP0])[knw ]= four;
+      //(D27.f[DIR_PM0])[kse ]= four;
+      //(D27.f[DIR_M0M])[kbw ]= four;
+      //(D27.f[DIR_P0P])[kte ]= four;
+      //(D27.f[DIR_M0P])[ktw ]= four;
+      //(D27.f[DIR_P0M])[kbe ]= four;
+      //(D27.f[DIR_0MM])[kbs ]= four;
+      //(D27.f[DIR_0PP])[ktn ]= four;
+      //(D27.f[DIR_0MP])[kts ]= four;
+      //(D27.f[DIR_0PM])[kbn ]= four;
       //(D27.f[DIR_MMM])[kbsw]= four;
       //(D27.f[DIR_PPP])[ktne]= four;
       //(D27.f[DIR_MMP])[ktsw]= four;
@@ -2325,24 +2325,24 @@ __global__ void QADVel27(real* DD,
       //(D27.f[DIR_PMP])[ktse]= four;
       //(D27.f[DIR_MPP])[ktnw]= four;
       //(D27.f[DIR_PMM])[kbse]= four;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -2351,24 +2351,24 @@ __global__ void QADVel27(real* DD,
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -2431,91 +2431,91 @@ __global__ void QAD7( real* DD,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -2539,24 +2539,24 @@ __global__ void QAD7( real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2599,32 +2599,32 @@ __global__ void QAD7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3/*, drho*/;
       //drho   =    f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2696,23 +2696,23 @@ __global__ void QAD7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2864,131 +2864,131 @@ __global__ void QADDirichlet27(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3009,24 +3009,24 @@ __global__ void QADDirichlet27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3066,33 +3066,33 @@ __global__ void QADDirichlet27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -3118,33 +3118,33 @@ __global__ void QADDirichlet27(
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3220,86 +3220,86 @@ __global__ void QADDirichlet27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[ktne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[kbsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[kbne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -3308,24 +3308,24 @@ __global__ void QADDirichlet27(
       q = q_dirBNW[kbnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
       q = q_dirBSE[kbse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
       q = q_dirTNW[ktnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -3389,131 +3389,131 @@ __global__ void QADBB27( real* DD,
                                    unsigned int* neighborX,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
-                                   unsigned int size_Mat, 
+                                   unsigned long long numberOfLBnodes, 
                                    bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3534,24 +3534,24 @@ __global__ void QADBB27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3591,33 +3591,33 @@ __global__ void QADBB27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, /*drho, feq,*/ q;
       real q;
@@ -3644,33 +3644,33 @@ __global__ void QADBB27( real* DD,
       //vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       //vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3746,86 +3746,86 @@ __global__ void QADBB27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=f27_E  ;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=f27_W  ;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=f27_N  ;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=f27_S  ;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=f27_T  ;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=f27_B  ;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=f27_NE ;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=f27_SW ;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=f27_SE ;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=f27_NW ;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=f27_TE ;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=f27_BW ;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=f27_BE ;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=f27_TW ;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=f27_TN ;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=f27_BS ;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=f27_BN ;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=f27_TS ;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=f27_E  ;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=f27_W  ;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=f27_N  ;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=f27_S  ;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=f27_T  ;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=f27_B  ;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=f27_NE ;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=f27_SW ;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=f27_SE ;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=f27_NW ;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=f27_TE ;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=f27_BW ;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=f27_BE ;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=f27_TW ;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=f27_TN ;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=f27_BS ;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=f27_BN ;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=f27_TS ;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=f27_TNE;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=f27_BSW;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=f27_BNE;
@@ -3905,91 +3905,91 @@ __global__ void QNoSlipADincomp7(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -4009,12 +4009,12 @@ __global__ void QNoSlipADincomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4046,32 +4046,32 @@ __global__ void QNoSlipADincomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -4131,23 +4131,23 @@ __global__ void QNoSlipADincomp7(
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -4329,131 +4329,131 @@ __global__ void QNoSlipADincomp27(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4474,24 +4474,24 @@ __global__ void QNoSlipADincomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4531,65 +4531,65 @@ __global__ void QNoSlipADincomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 =  ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -4665,63 +4665,63 @@ __global__ void QNoSlipADincomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -4729,24 +4729,24 @@ __global__ void QNoSlipADincomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -4811,91 +4811,91 @@ __global__ void QADVeloIncomp7(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -4915,12 +4915,12 @@ __global__ void QADVeloIncomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4952,32 +4952,32 @@ __global__ void QADVeloIncomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1_Inflow   = c0o1;
       //real vx2_Inflow   = velo[k];
@@ -5091,23 +5091,23 @@ __global__ void QADVeloIncomp7(
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -5289,131 +5289,131 @@ __global__ void QADVeloIncomp27(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5434,24 +5434,24 @@ __global__ void QADVeloIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -5491,65 +5491,65 @@ __global__ void QADVeloIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -5630,63 +5630,63 @@ __global__ void QADVeloIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -5694,24 +5694,24 @@ __global__ void QADVeloIncomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -5720,24 +5720,24 @@ __global__ void QADVeloIncomp27(
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -5801,91 +5801,91 @@ __global__ void QADPressIncomp7( real* DD,
 										   unsigned int* neighborX,
 										   unsigned int* neighborY,
 										   unsigned int* neighborZ,
-										   unsigned int size_Mat, 
+										   unsigned long long numberOfLBnodes, 
 										   bool isEvenTimestep)
 {
   /* Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    }*/
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -5905,12 +5905,12 @@ __global__ void QADPressIncomp7( real* DD,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5945,32 +5945,32 @@ __global__ void QADPressIncomp7( real* DD,
     /*  real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];*/
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];*/
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -6035,23 +6035,23 @@ __global__ void QADPressIncomp7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -6240,131 +6240,131 @@ __global__ void QADPressIncomp27(
 											   unsigned int* neighborX,
 											   unsigned int* neighborY,
 											   unsigned int* neighborZ,
-											   unsigned int size_Mat, 
+											   unsigned long long numberOfLBnodes, 
 											   bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -6385,24 +6385,24 @@ __global__ void QADPressIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -6442,65 +6442,65 @@ __global__ void QADPressIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3      = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -6581,63 +6581,63 @@ __global__ void QADPressIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -6645,24 +6645,24 @@ __global__ void QADPressIncomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -6671,24 +6671,24 @@ __global__ void QADPressIncomp27(
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -6739,131 +6739,131 @@ __global__ void AD_SlipVelDeviceComp(
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
-    uint size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep)
 {
     Distributions27 D;
     if (isEvenTimestep)
     {
-        D.f[DIR_P00   ] = &distributions[DIR_P00    * size_Mat];
-        D.f[DIR_M00   ] = &distributions[DIR_M00    * size_Mat];
-        D.f[DIR_0P0   ] = &distributions[DIR_0P0    * size_Mat];
-        D.f[DIR_0M0   ] = &distributions[DIR_0M0    * size_Mat];
-        D.f[DIR_00P   ] = &distributions[DIR_00P    * size_Mat];
-        D.f[DIR_00M   ] = &distributions[DIR_00M    * size_Mat];
-        D.f[DIR_PP0  ] = &distributions[DIR_PP0   * size_Mat];
-        D.f[DIR_MM0  ] = &distributions[DIR_MM0   * size_Mat];
-        D.f[DIR_PM0  ] = &distributions[DIR_PM0   * size_Mat];
-        D.f[DIR_MP0  ] = &distributions[DIR_MP0   * size_Mat];
-        D.f[DIR_P0P  ] = &distributions[DIR_P0P   * size_Mat];
-        D.f[DIR_M0M  ] = &distributions[DIR_M0M   * size_Mat];
-        D.f[DIR_P0M  ] = &distributions[DIR_P0M   * size_Mat];
-        D.f[DIR_M0P  ] = &distributions[DIR_M0P   * size_Mat];
-        D.f[DIR_0PP  ] = &distributions[DIR_0PP   * size_Mat];
-        D.f[DIR_0MM  ] = &distributions[DIR_0MM   * size_Mat];
-        D.f[DIR_0PM  ] = &distributions[DIR_0PM   * size_Mat];
-        D.f[DIR_0MP  ] = &distributions[DIR_0MP   * size_Mat];
-        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-        D.f[DIR_PPP ] = &distributions[DIR_PPP  * size_Mat];
-        D.f[DIR_MMP ] = &distributions[DIR_MMP  * size_Mat];
-        D.f[DIR_PMP ] = &distributions[DIR_PMP  * size_Mat];
-        D.f[DIR_MPP ] = &distributions[DIR_MPP  * size_Mat];
-        D.f[DIR_PPM ] = &distributions[DIR_PPM  * size_Mat];
-        D.f[DIR_MMM ] = &distributions[DIR_MMM  * size_Mat];
-        D.f[DIR_PMM ] = &distributions[DIR_PMM  * size_Mat];
-        D.f[DIR_MPM ] = &distributions[DIR_MPM  * size_Mat];
+        D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+        D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+        D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+        D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+        D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+        D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+        D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+        D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+        D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+        D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+        D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+        D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+        D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+        D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+        D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+        D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+        D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+        D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+        D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+        D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+        D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+        D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+        D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+        D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+        D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+        D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+        D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
     }
     else
     {
-        D.f[DIR_M00   ] = &distributions[DIR_P00    * size_Mat];
-        D.f[DIR_P00   ] = &distributions[DIR_M00    * size_Mat];
-        D.f[DIR_0M0   ] = &distributions[DIR_0P0    * size_Mat];
-        D.f[DIR_0P0   ] = &distributions[DIR_0M0    * size_Mat];
-        D.f[DIR_00M   ] = &distributions[DIR_00P    * size_Mat];
-        D.f[DIR_00P   ] = &distributions[DIR_00M    * size_Mat];
-        D.f[DIR_MM0  ] = &distributions[DIR_PP0   * size_Mat];
-        D.f[DIR_PP0  ] = &distributions[DIR_MM0   * size_Mat];
-        D.f[DIR_MP0  ] = &distributions[DIR_PM0   * size_Mat];
-        D.f[DIR_PM0  ] = &distributions[DIR_MP0   * size_Mat];
-        D.f[DIR_M0M  ] = &distributions[DIR_P0P   * size_Mat];
-        D.f[DIR_P0P  ] = &distributions[DIR_M0M   * size_Mat];
-        D.f[DIR_M0P  ] = &distributions[DIR_P0M   * size_Mat];
-        D.f[DIR_P0M  ] = &distributions[DIR_M0P   * size_Mat];
-        D.f[DIR_0MM  ] = &distributions[DIR_0PP   * size_Mat];
-        D.f[DIR_0PP  ] = &distributions[DIR_0MM   * size_Mat];
-        D.f[DIR_0MP  ] = &distributions[DIR_0PM   * size_Mat];
-        D.f[DIR_0PM  ] = &distributions[DIR_0MP   * size_Mat];
-        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-        D.f[DIR_PPP ] = &distributions[DIR_MMM  * size_Mat];
-        D.f[DIR_MMP ] = &distributions[DIR_PPM  * size_Mat];
-        D.f[DIR_PMP ] = &distributions[DIR_MPM  * size_Mat];
-        D.f[DIR_MPP ] = &distributions[DIR_PMM  * size_Mat];
-        D.f[DIR_PPM ] = &distributions[DIR_MMP  * size_Mat];
-        D.f[DIR_MMM ] = &distributions[DIR_PPP  * size_Mat];
-        D.f[DIR_PMM ] = &distributions[DIR_MPP  * size_Mat];
-        D.f[DIR_MPM ] = &distributions[DIR_PMP  * size_Mat];
+        D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+        D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+        D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+        D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+        D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+        D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+        D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+        D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+        D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+        D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+        D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+        D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+        D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+        D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+        D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+        D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+        D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+        D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+        D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+        D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+        D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+        D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+        D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+        D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+        D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+        D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+        D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
     }
     ////////////////////////////////////////////////////////////////////////////////
     Distributions27 DAD;
     if (isEvenTimestep)
     {
-        DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
-        DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
-        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
-        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
-        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
-        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
-        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
-        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
-        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
-        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
-        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
-        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
-        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-        DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
-        DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
-        DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
-        DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
-        DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
-        DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
-        DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
-        DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
+        DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+        DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+        DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+        DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+        DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+        DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+        DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+        DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+        DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+        DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+        DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+        DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+        DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+        DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+        DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+        DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+        DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+        DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+        DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+        DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+        DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+        DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+        DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+        DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+        DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+        DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
     }
     else
     {
-        DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
-        DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
-        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
-        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
-        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
-        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
-        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
-        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
-        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
-        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
-        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
-        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
-        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-        DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
-        DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
-        DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
-        DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
-        DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
-        DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
-        DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
-        DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
+        DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+        DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+        DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+        DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+        DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+        DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+        DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+        DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+        DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+        DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+        DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+        DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+        DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+        DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+        DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+        DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+        DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+        DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+        DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+        DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+        DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+        DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+        DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+        DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+        DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+        DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
     }
     ////////////////////////////////////////////////////////////////////////////////
     const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -6888,24 +6888,24 @@ __global__ void AD_SlipVelDeviceComp(
             * q_dirBE, * q_dirTW, * q_dirTN, * q_dirBS, * q_dirBN, * q_dirTS,
             * q_dirTNE, * q_dirTSW, * q_dirTSE, * q_dirTNW, * q_dirBNE, * q_dirBSW,
             * q_dirBSE, * q_dirBNW;
-        q_dirE   = &Qarrays[DIR_P00   * numberOfBCnodes];
-        q_dirW   = &Qarrays[DIR_M00   * numberOfBCnodes];
-        q_dirN   = &Qarrays[DIR_0P0   * numberOfBCnodes];
-        q_dirS   = &Qarrays[DIR_0M0   * numberOfBCnodes];
-        q_dirT   = &Qarrays[DIR_00P   * numberOfBCnodes];
-        q_dirB   = &Qarrays[DIR_00M   * numberOfBCnodes];
-        q_dirNE  = &Qarrays[DIR_PP0  * numberOfBCnodes];
-        q_dirSW  = &Qarrays[DIR_MM0  * numberOfBCnodes];
-        q_dirSE  = &Qarrays[DIR_PM0  * numberOfBCnodes];
-        q_dirNW  = &Qarrays[DIR_MP0  * numberOfBCnodes];
-        q_dirTE  = &Qarrays[DIR_P0P  * numberOfBCnodes];
-        q_dirBW  = &Qarrays[DIR_M0M  * numberOfBCnodes];
-        q_dirBE  = &Qarrays[DIR_P0M  * numberOfBCnodes];
-        q_dirTW  = &Qarrays[DIR_M0P  * numberOfBCnodes];
-        q_dirTN  = &Qarrays[DIR_0PP  * numberOfBCnodes];
-        q_dirBS  = &Qarrays[DIR_0MM  * numberOfBCnodes];
-        q_dirBN  = &Qarrays[DIR_0PM  * numberOfBCnodes];
-        q_dirTS  = &Qarrays[DIR_0MP  * numberOfBCnodes];
+        q_dirE   = &Qarrays[DIR_P00 * numberOfBCnodes];
+        q_dirW   = &Qarrays[DIR_M00 * numberOfBCnodes];
+        q_dirN   = &Qarrays[DIR_0P0 * numberOfBCnodes];
+        q_dirS   = &Qarrays[DIR_0M0 * numberOfBCnodes];
+        q_dirT   = &Qarrays[DIR_00P * numberOfBCnodes];
+        q_dirB   = &Qarrays[DIR_00M * numberOfBCnodes];
+        q_dirNE  = &Qarrays[DIR_PP0 * numberOfBCnodes];
+        q_dirSW  = &Qarrays[DIR_MM0 * numberOfBCnodes];
+        q_dirSE  = &Qarrays[DIR_PM0 * numberOfBCnodes];
+        q_dirNW  = &Qarrays[DIR_MP0 * numberOfBCnodes];
+        q_dirTE  = &Qarrays[DIR_P0P * numberOfBCnodes];
+        q_dirBW  = &Qarrays[DIR_M0M * numberOfBCnodes];
+        q_dirBE  = &Qarrays[DIR_P0M * numberOfBCnodes];
+        q_dirTW  = &Qarrays[DIR_M0P * numberOfBCnodes];
+        q_dirTN  = &Qarrays[DIR_0PP * numberOfBCnodes];
+        q_dirBS  = &Qarrays[DIR_0MM * numberOfBCnodes];
+        q_dirBN  = &Qarrays[DIR_0PM * numberOfBCnodes];
+        q_dirTS  = &Qarrays[DIR_0MP * numberOfBCnodes];
         q_dirTNE = &Qarrays[DIR_PPP * numberOfBCnodes];
         q_dirTSW = &Qarrays[DIR_MMP * numberOfBCnodes];
         q_dirTSE = &Qarrays[DIR_PMP * numberOfBCnodes];
@@ -7025,63 +7025,63 @@ __global__ void AD_SlipVelDeviceComp(
         //////////////////////////////////////////////////////////////////////////
         if (!isEvenTimestep)
         {
-            DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
-            DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
-            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
-            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
-            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
-            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
-            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
-            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
-            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
-            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
-            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
-            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
-            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-            DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
-            DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
-            DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
-            DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
-            DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
-            DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
-            DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
-            DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
+            DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+            DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+            DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+            DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+            DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+            DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+            DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+            DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+            DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+            DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+            DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+            DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+            DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+            DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+            DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+            DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+            DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+            DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+            DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+            DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+            DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+            DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+            DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+            DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+            DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+            DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
         }
         else
         {
-            DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
-            DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
-            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
-            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
-            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
-            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
-            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
-            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
-            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
-            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
-            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
-            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
-            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-            DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
-            DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
-            DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
-            DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
-            DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
-            DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
-            DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
-            DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
+            DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+            DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+            DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+            DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+            DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+            DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+            DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+            DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+            DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+            DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+            DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+            DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+            DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+            DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+            DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+            DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+            DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+            DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+            DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+            DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+            DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+            DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+            DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+            DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+            DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+            DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
         }
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         real concentration =
@@ -7115,24 +7115,24 @@ __global__ void AD_SlipVelDeviceComp(
         real jTan3 = jx3 - NormJ * NormZ;
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00  ])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
-        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00  ])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
-        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0  ])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
-        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0  ])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
-        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M  ])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
-        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P  ])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
-        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0 ])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
-        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0 ])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
-        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0 ])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
-        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0 ])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
-        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M ])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
-        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P ])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
-        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P ])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
-        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M ])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
-        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM ])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
-        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP ])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
-        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP ])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
-        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM ])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
+        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
+        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
+        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
+        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
+        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
+        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
+        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
+        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
+        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
+        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
+        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
+        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
+        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
+        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
+        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
+        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
+        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
+        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
         q = q_dirTNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMM])[kbsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2+vx3, cu_sq, f_TNE, f_BSW, omegaDiffusivity,  jTan1+jTan2+jTan3, concentration); }
         q = q_dirBSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PPP])[ktne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2-vx3, cu_sq, f_BSW, f_TNE, omegaDiffusivity, -jTan1-jTan2-jTan3, concentration); }
         q = q_dirBNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMP])[ktsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2-vx3, cu_sq, f_BNE, f_TSW, omegaDiffusivity,  jTan1+jTan2-jTan3, concentration); }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
index 1ef111330c0d4293c14d66893847689ad8fac77f..8d02f4e1c110fc82b65adda4db67976f29796d07 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
@@ -14,69 +14,69 @@ __global__ void CalcCP27(real* DD,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -124,20 +124,20 @@ __global__ void CalcCP27(real* DD,
 		////////////////////////////////////////////////////////////////////////////////
 		double PressCP;
 
-		PressCP  =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                     (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                     (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                     (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                     (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                     (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                     (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                     (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                     (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+		PressCP  =   (D.f[DIR_P00])[ke  ]+ (D.f[DIR_M00])[kw  ]+ 
+                     (D.f[DIR_0P0])[kn  ]+ (D.f[DIR_0M0])[ks  ]+
+                     (D.f[DIR_00P])[kt  ]+ (D.f[DIR_00M])[kb  ]+
+                     (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+
+                     (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+
+                     (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+
+                     (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+
+                     (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+
+                     (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+
                      (D.f[DIR_000])[kzero]+ 
-                     (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                     (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                     (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                     (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+                     (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ 
+                     (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ 
+                     (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ 
+                     (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		cpPress[k] = PressCP;
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
index ce8fe68cd6a2e8f09f150cb0ccdec502a6278b50..c41751dc1b5cea53983d94d9cc7c3c75c8a84101 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
@@ -16,70 +16,70 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
-														unsigned int size_Mat,
+														unsigned long long numberOfLBnodes,
 														real* DD,
 														bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -92,7 +92,7 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k < size_Mat)
+   if(k < numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -125,33 +125,33 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
       unsigned int kbsw = neighborZ[ksw];
       //////////////////////////////////////////////////////////////////////////
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,/*f_ZERO,*/f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[DIR_P00   ])[ke   ];
-	  f_W    = (D.f[DIR_M00   ])[kw   ];
-	  f_N    = (D.f[DIR_0P0   ])[kn   ];
-	  f_S    = (D.f[DIR_0M0   ])[ks   ];
-	  f_T    = (D.f[DIR_00P   ])[kt   ];
-	  f_B    = (D.f[DIR_00M   ])[kb   ];
-	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
-	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
-	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
-	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
-	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  f_E    = (D.f[DIR_P00])[ke   ];
+	  f_W    = (D.f[DIR_M00])[kw   ];
+	  f_N    = (D.f[DIR_0P0])[kn   ];
+	  f_S    = (D.f[DIR_0M0])[ks   ];
+	  f_T    = (D.f[DIR_00P])[kt   ];
+	  f_B    = (D.f[DIR_00M])[kb   ];
+	  f_NE   = (D.f[DIR_PP0])[kne  ];
+	  f_SW   = (D.f[DIR_MM0])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0])[kse  ];
+	  f_NW   = (D.f[DIR_MP0])[knw  ];
+	  f_TE   = (D.f[DIR_P0P])[kte  ];
+	  f_BW   = (D.f[DIR_M0M])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP])[kts  ];
 	  //f_ZERO = (D.f[DIR_000])[kzero];
-	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
-	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
-	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
-	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
-	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+	  f_TNE  = (D.f[DIR_PPP])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3;
       kxyFromfcNEQ[k]       = c0o1;
@@ -215,70 +215,70 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
-													unsigned int size_Mat,
+													unsigned long long numberOfLBnodes,
 													real* DD,
 													bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -291,7 +291,7 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k < size_Mat)
+   if(k < numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -325,33 +325,33 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
       //////////////////////////////////////////////////////////////////////////
       real f_ZERO;
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[DIR_P00   ])[ke   ];
-	  f_W    = (D.f[DIR_M00   ])[kw   ];
-	  f_N    = (D.f[DIR_0P0   ])[kn   ];
-	  f_S    = (D.f[DIR_0M0   ])[ks   ];
-	  f_T    = (D.f[DIR_00P   ])[kt   ];
-	  f_B    = (D.f[DIR_00M   ])[kb   ];
-	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
-	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
-	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
-	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
-	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  f_E    = (D.f[DIR_P00])[ke   ];
+	  f_W    = (D.f[DIR_M00])[kw   ];
+	  f_N    = (D.f[DIR_0P0])[kn   ];
+	  f_S    = (D.f[DIR_0M0])[ks   ];
+	  f_T    = (D.f[DIR_00P])[kt   ];
+	  f_B    = (D.f[DIR_00M])[kb   ];
+	  f_NE   = (D.f[DIR_PP0])[kne  ];
+	  f_SW   = (D.f[DIR_MM0])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0])[kse  ];
+	  f_NW   = (D.f[DIR_MP0])[knw  ];
+	  f_TE   = (D.f[DIR_P0P])[kte  ];
+	  f_BW   = (D.f[DIR_M0M])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP])[kts  ];
 	  f_ZERO = (D.f[DIR_000])[kzero];
-	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
-	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
-	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
-	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
-	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+	  f_TNE  = (D.f[DIR_PPP])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real drho;
 	  real vx1, vx2, vx3, rho;
@@ -423,7 +423,7 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -437,7 +437,7 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -448,63 +448,63 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -517,33 +517,33 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -857,7 +857,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
 													real* DDStart,
-													int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -871,7 +871,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -882,63 +882,63 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -951,33 +951,33 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1298,7 +1298,7 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															unsigned int* neighborY,
 															unsigned int* neighborZ,
 															real* DDStart,
-															int size_Mat,
+															unsigned long long numberOfLBnodes,
 															bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1312,7 +1312,7 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1323,63 +1323,63 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1392,33 +1392,33 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -1752,7 +1752,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1766,7 +1766,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1777,63 +1777,63 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1846,33 +1846,33 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
index d246f39a030b6df0b249aee17f37b7d5258ff00d..ad5a05b12a1b3ae2541e36ccffae4635fccfe62a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
@@ -45,7 +45,7 @@ __global__ void CalcConc27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* distributionsAD,
 	bool isEvenTimestep)
 {
@@ -67,7 +67,7 @@ __global__ void CalcConc27(
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size_Mat and fluid nodes
-   if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+   if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -76,63 +76,63 @@ __global__ void CalcConc27(
       Distributions27 distAD;
       if (isEvenTimestep)
       {
-         distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-         distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-         distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-         distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-         distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-         distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-         distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-         distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-         distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-         distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+         distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+         distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+         distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+         distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+         distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+         distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+         distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+         distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+         distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+         distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+         distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+         distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+         distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+         distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+         distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+         distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+         distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+         distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+         distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+         distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+         distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+         distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+         distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+         distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+         distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+         distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-         distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-         distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-         distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-         distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
-         distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-         distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-         distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-         distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-         distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
+         distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+         distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+         distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+         distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+         distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+         distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+         distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+         distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+         distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+         distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+         distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+         distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+         distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+         distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+         distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+         distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+         distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+         distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+         distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+         distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+         distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+         distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+         distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+         distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+         distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+         distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
       }
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set neighbor indices (necessary for indirect addressing)
@@ -166,33 +166,33 @@ __global__ void CalcConc27(
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set local distributions
 	  //!
-	  real mfcbb = (distAD.f[DIR_P00   ])[ke  ];
-	  real mfabb = (distAD.f[DIR_M00   ])[kw  ];
-	  real mfbcb = (distAD.f[DIR_0P0   ])[kn  ];
-	  real mfbab = (distAD.f[DIR_0M0   ])[ks  ];
-	  real mfbbc = (distAD.f[DIR_00P   ])[kt  ];
-	  real mfbba = (distAD.f[DIR_00M   ])[kb  ];
-	  real mfccb = (distAD.f[DIR_PP0  ])[kne ];
-	  real mfaab = (distAD.f[DIR_MM0  ])[ksw ];
-	  real mfcab = (distAD.f[DIR_PM0  ])[kse ];
-	  real mfacb = (distAD.f[DIR_MP0  ])[knw ];
-	  real mfcbc = (distAD.f[DIR_P0P  ])[kte ];
-	  real mfaba = (distAD.f[DIR_M0M  ])[kbw ];
-	  real mfcba = (distAD.f[DIR_P0M  ])[kbe ];
-	  real mfabc = (distAD.f[DIR_M0P  ])[ktw ];
-	  real mfbcc = (distAD.f[DIR_0PP  ])[ktn ];
-	  real mfbaa = (distAD.f[DIR_0MM  ])[kbs ];
-	  real mfbca = (distAD.f[DIR_0PM  ])[kbn ];
-	  real mfbac = (distAD.f[DIR_0MP  ])[kts ];
+	  real mfcbb = (distAD.f[DIR_P00])[ke  ];
+	  real mfabb = (distAD.f[DIR_M00])[kw  ];
+	  real mfbcb = (distAD.f[DIR_0P0])[kn  ];
+	  real mfbab = (distAD.f[DIR_0M0])[ks  ];
+	  real mfbbc = (distAD.f[DIR_00P])[kt  ];
+	  real mfbba = (distAD.f[DIR_00M])[kb  ];
+	  real mfccb = (distAD.f[DIR_PP0])[kne ];
+	  real mfaab = (distAD.f[DIR_MM0])[ksw ];
+	  real mfcab = (distAD.f[DIR_PM0])[kse ];
+	  real mfacb = (distAD.f[DIR_MP0])[knw ];
+	  real mfcbc = (distAD.f[DIR_P0P])[kte ];
+	  real mfaba = (distAD.f[DIR_M0M])[kbw ];
+	  real mfcba = (distAD.f[DIR_P0M])[kbe ];
+	  real mfabc = (distAD.f[DIR_M0P])[ktw ];
+	  real mfbcc = (distAD.f[DIR_0PP])[ktn ];
+	  real mfbaa = (distAD.f[DIR_0MM])[kbs ];
+	  real mfbca = (distAD.f[DIR_0PM])[kbn ];
+	  real mfbac = (distAD.f[DIR_0MP])[kts ];
 	  real mfbbb = (distAD.f[DIR_000])[k   ];
-	  real mfccc = (distAD.f[DIR_PPP ])[ktne];
-	  real mfaac = (distAD.f[DIR_MMP ])[ktsw];
-	  real mfcac = (distAD.f[DIR_PMP ])[ktse];
-	  real mfacc = (distAD.f[DIR_MPP ])[ktnw];
-	  real mfcca = (distAD.f[DIR_PPM ])[kbne];
-	  real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
-	  real mfcaa = (distAD.f[DIR_PMM ])[kbse];
-	  real mfaca = (distAD.f[DIR_MPM ])[kbnw];
+	  real mfccc = (distAD.f[DIR_PPP])[ktne];
+	  real mfaac = (distAD.f[DIR_MMP])[ktsw];
+	  real mfcac = (distAD.f[DIR_PMP])[ktse];
+	  real mfacc = (distAD.f[DIR_MPP])[ktnw];
+	  real mfcca = (distAD.f[DIR_PPM])[kbne];
+	  real mfaaa = (distAD.f[DIR_MMM])[kbsw];
+	  real mfcaa = (distAD.f[DIR_PMM])[kbse];
+	  real mfaca = (distAD.f[DIR_MPM])[kbnw];
       //////////////////////////////////////////////////////////////////////////
 	  //! - Calculate concentration using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 	  //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -229,30 +229,30 @@ __global__ void CalcConc7( real* Conc,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD7,
                                           bool isEvenTimestep)
 {
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    } 
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -265,7 +265,7 @@ __global__ void CalcConc7( real* Conc,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -327,63 +327,63 @@ __global__ void CalcConc7( real* Conc,
 //    Distributions27 D27;
 //    if (isEvenTimestep==true)
 //    {
-//       D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-//       D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-//       D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-//       D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-//       D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-//       D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-//       D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-//       D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-//       D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-//       D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-//       D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-//       D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-//       D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-//       D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-//       D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-//       D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-//       D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-//       D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//       D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-//       D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-//       D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-//       D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-//       D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-//       D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-//       D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-//       D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+//       D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+//       D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+//       D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+//       D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+//       D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+//       D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+//       D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+//       D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+//       D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+//       D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+//       D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+//       D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+//       D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+//       D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+//       D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+//       D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+//       D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+//       D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//       D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+//       D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+//       D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+//       D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+//       D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+//       D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+//       D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+//       D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 //    }
 //    else
 //    {
-//       D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-//       D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-//       D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-//       D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-//       D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-//       D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-//       D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-//       D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-//       D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-//       D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-//       D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-//       D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-//       D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-//       D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-//       D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-//       D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-//       D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-//       D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//       D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-//       D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-//       D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-//       D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-//       D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-//       D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-//       D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-//       D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+//       D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+//       D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+//       D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+//       D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+//       D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+//       D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+//       D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+//       D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+//       D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+//       D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+//       D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+//       D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+//       D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+//       D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+//       D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+//       D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+//       D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+//       D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//       D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+//       D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+//       D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+//       D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+//       D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+//       D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+//       D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+//       D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 //    }
 //    ////////////////////////////////////////////////////////////////////////////////
 //    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -432,20 +432,20 @@ __global__ void CalcConc7( real* Conc,
 
 //       if(geoD[k] == GEO_FLUID)
 //       {
-//          Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
-//                         (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
-//                         (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
-//                         (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
-//                         (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
-//                         (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
-//                         (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
-//                         (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
-//                         (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+//          Conc[k]    =   (D27.f[DIR_P00])[ke  ]+ (D27.f[DIR_M00])[kw  ]+ 
+//                         (D27.f[DIR_0P0])[kn  ]+ (D27.f[DIR_0M0])[ks  ]+
+//                         (D27.f[DIR_00P])[kt  ]+ (D27.f[DIR_00M])[kb  ]+
+//                         (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+
+//                         (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+
+//                         (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+
+//                         (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+
+//                         (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+
+//                         (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+
 //                         (D27.f[DIR_000])[kzero]+ 
-//                         (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
-//                         (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
-//                         (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
-//                         (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
+//                         (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+
+//                         (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+
+//                         (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+
+//                         (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw];
 //       }
 //    }   
 // }
@@ -476,30 +476,30 @@ __global__ void GetPlaneConc7(real* Conc,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											real* DD7,
 											bool isEvenTimestep)
 {
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    } 
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -581,70 +581,70 @@ __global__ void GetPlaneConc27(real* Conc,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
+											 unsigned long long numberOfLBnodes,
 											 real* DD27,
 											 bool isEvenTimestep)
 {
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -693,20 +693,20 @@ __global__ void GetPlaneConc27(real* Conc,
 
       if(geoD[k] == GEO_FLUID)
       {
-         Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
-                        (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
-                        (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
-                        (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
-                        (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
-                        (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
-                        (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
-                        (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
-                        (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+         Conc[k]    =   (D27.f[DIR_P00])[ke  ]+ (D27.f[DIR_M00])[kw  ]+ 
+                        (D27.f[DIR_0P0])[kn  ]+ (D27.f[DIR_0M0])[ks  ]+
+                        (D27.f[DIR_00P])[kt  ]+ (D27.f[DIR_00M])[kb  ]+
+                        (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+
+                        (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+
+                        (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+
+                        (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+
+                        (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+
+                        (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+
                         (D27.f[DIR_000])[kzero]+ 
-                        (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
-                        (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
-                        (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
-                        (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
+                        (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+
+                        (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+
+                        (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+
+                        (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw];
       }
    }   
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index 4792b8846b2612383c07a97419e0473b21ebd187..f7bb09f816f45973fd4e2319a1bfa35cf9172caa 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -1,306 +1,310 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalcMac27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Soeren Peters
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
+#include "lbm/MacroscopicQuantities.h"
+
+#include "Kernel/Utilities/DistributionHelper.cuh"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+////////////////////////////////////////////////////////////////////////////////
+__global__ void LBCalcMac27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
+{
+    const unsigned int tx = threadIdx.x;    // Thread index = lokaler i index
+    const unsigned int by = blockIdx.x;     // Block index x
+    const unsigned int bz = blockIdx.y;     // Block index y
+    const unsigned int x = tx + STARTOFFX;  // Globaler x-Index
+    const unsigned int y = by + STARTOFFY;  // Globaler y-Index
+    const unsigned int z = bz + STARTOFFZ;  // Globaler z-Index
+ 
+    const unsigned nx = blockDim.x + 2 * STARTOFFX;
+    const unsigned ny = gridDim.x + 2 * STARTOFFY;
+ 
+    const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
+ 
+ 
+    if(k >= numberOfLBnodes)
+        return;
+ 
+    if(!isValidFluidNode(geoD[k]))
+       return;
+ 
+    rhoD[k] = c0o1;
+    vxD[k]  = c0o1;
+    vyD[k]  = c0o1;
+    vzD[k]  = c0o1;
+ 
+    DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, neighborZ);
+    const auto& distribution = distr_wrapper.distribution;
+ 
+    rhoD[k] = vf::lbm::getDensity(distribution.f);
+    vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f);
+    vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f);
+    vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f);
+}
+
 
-#include "lbm/MacroscopicQuantities.h"
 
-#include "../Kernel/Utilities/DistributionHelper.cuh"
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMac27( real* vxD,
-                                        real* vyD,
-                                        real* vzD,
-                                        real* rhoD,
-                                        unsigned int* geoD,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        real* distributions,
-                                        bool isEvenTimestep)
+__global__ void LBCalcMacSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   const unsigned int tx = threadIdx.x;    // Thread index = lokaler i index
-   const unsigned int by = blockIdx.x;     // Block index x
-   const unsigned int bz = blockIdx.y;     // Block index y
-   const unsigned int x = tx + STARTOFFX;  // Globaler x-Index 
-   const unsigned int y = by + STARTOFFY;  // Globaler y-Index 
-   const unsigned int z = bz + STARTOFFZ;  // Globaler z-Index 
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+   
+    //////////////////////////////////////////////////////////////////////////
+    if(nodeIndex<numberOfLBnodes)
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+       
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= nodeIndex;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+       
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex] = 
+                (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_000])[kzero]+ 
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vxD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            vyD[nodeIndex] =
+                (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vzD[nodeIndex] =
+                (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            pressD[nodeIndex] =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                2.f*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                3.f*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+c0o1*rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]; // times zero for incompressible case   
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+       }
+    }
+}
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-   const unsigned nx = blockDim.x + 2 * STARTOFFX;
-   const unsigned ny = gridDim.x + 2 * STARTOFFY;
 
-   const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
 
 
-   if(k >= size_Mat)
-      return;
 
-   if(!vf::gpu::isValidFluidNode(geoD[k]))
-      return;
 
-   rhoD[k] = c0o1;
-   vxD[k]  = c0o1;
-   vyD[k]  = c0o1;
-   vzD[k]  = c0o1;
 
-   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ);
-   const auto& distribution = distr_wrapper.distribution;
 
-   rhoD[k] = vf::lbm::getDensity(distribution.f);
-   vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f);
-   vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f);
-   vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f);
 
-}
 
 
 
 
 
-////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMacSP27( real* vxD,
-                                          real* vyD,
-                                          real* vzD,
-                                          real* rhoD,
-                                          real* pressD,
-                                          unsigned int* geoD,
-                                          unsigned int* neighborX,
-                                          unsigned int* neighborY,
-                                          unsigned int* neighborZ,
-                                          unsigned int size_Mat,
-                                          real* DD,
-                                          bool isEvenTimestep)
-{
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
 
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        2.f*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        3.f*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+c0o1*rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case   
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-
-      }
-   }
-}
 
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBCalcMacCompSP27(
-   real *vxD,
-   real *vyD,
-   real *vzD,
-   real *rhoD,
-   real *pressD,
-   unsigned int *geoD,
-   unsigned int *neighborX,
-   unsigned int *neighborY,
-   unsigned int *neighborZ,
-   unsigned int size_Mat,
-   real *distributions,
-   bool isEvenTimestep)
+    real *vxD,
+    real *vyD,
+    real *vzD,
+    real *rhoD,
+    real *pressD,
+    unsigned int *geoD,
+    unsigned int *neighborX,
+    unsigned int *neighborY,
+    unsigned int *neighborZ,
+    unsigned long long numberOfLBnodes,
+    real *distributions,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k >= size_Mat)
+    if(nodeIndex >= numberOfLBnodes)
         return;
 
-    pressD[k] = c0o1;
-    rhoD[k]   = c0o1;
-    vxD[k]    = c0o1;
-    vyD[k]    = c0o1;
-    vzD[k]    = c0o1;
+    pressD[nodeIndex] = c0o1;
+    rhoD[nodeIndex]   = c0o1;
+    vxD[nodeIndex]    = c0o1;
+    vyD[nodeIndex]    = c0o1;
+    vzD[nodeIndex]    = c0o1;
 
-    if (!vf::gpu::isValidFluidNode(geoD[k]))
+    if (!isValidFluidNode(geoD[nodeIndex]))
         return;
 
-    vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
-                                               neighborZ);
+    DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ);
     const auto &distribution = distr_wrapper.distribution;
 
-    rhoD[k]   = vf::lbm::getDensity(distribution.f);
-    vxD[k]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]);
-    vyD[k]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]);
-    vzD[k]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]);
-    pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); 
+    rhoD[nodeIndex]   = vf::lbm::getDensity(distribution.f);
+    vxD[nodeIndex]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[nodeIndex]);
+    vyD[nodeIndex]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[nodeIndex]);
+    vzD[nodeIndex]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[nodeIndex]);
+    pressD[nodeIndex] = vf::lbm::getPressure(distribution.f, rhoD[nodeIndex], vxD[nodeIndex], vyD[nodeIndex], vzD[nodeIndex]); 
 }
 
 
@@ -339,206 +343,155 @@ __global__ void LBCalcMacCompSP27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMedSP27( real* vxD,
-                                          real* vyD,
-                                          real* vzD,
-                                          real* rhoD,
-                                          real* pressD,
-                                          unsigned int* geoD,
-                                          unsigned int* neighborX,
-                                          unsigned int* neighborY,
-                                          unsigned int* neighborZ,
-                                          unsigned int size_Mat,
-                                          real* DD,
-                                          bool isEvenTimestep)
+__global__ void LBCalcMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
-                        RHO;
-
-         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
-                        VX;
-
-         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
-                        VY;
-
-         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
-                        VZ;
-
-         pressD[k]  =   ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        c2o1*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        c3o1*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
-                        PRESS;    
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+        
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= nodeIndex;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_000])[kzero]+ 
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+
+                RHO;
+            
+            vxD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+
+                VX;
+            
+            vyD[nodeIndex] =
+                (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+
+                VY;
+            
+            vzD[nodeIndex] =
+                (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+
+                VZ;
+            
+            pressD[nodeIndex] =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                c2o1*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                c3o1*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+
+                PRESS;    
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -563,259 +516,152 @@ __global__ void LBCalcMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMedCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep)
+__global__ void LBCalcMedCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      //unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-		  real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
-		  real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
-		  real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
-		  real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
-		  real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
-		  real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
-		  real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
-		  real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
-		  real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
-		  real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
-		  real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
-		  real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
-		  real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
-		  real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
-		  real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
-		  real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
-		  real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
-		  real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
-		  real mfbbb = (D.f[DIR_000])[k];//[kzero];
-		  real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
-		  real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
-		  real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
-		  real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
-		  real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
-		  real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
-		  real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
-		  real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
-		  ////////////////////////////////////////////////////////////////////////////////////
-		  real drho = 
-			  ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-			  (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-			  ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-		  real rho = c1o1 + drho;
-		  
-		  rhoD[k] = drho + RHO;
-
-		  vxD[k] = 
-			  (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-			  (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-			  (mfcbb - mfabb)) / rho) + VX;
-		  vyD[k] = 
-			  (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-			  (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-			  (mfbcb - mfbab)) / rho) + VY;
-		  vzD[k] = 
-			  (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-			  (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-			  (mfbbc - mfbba)) / rho) + VZ;
-
-		  //rhoD[k] =
-			 // (D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
-			 // (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
-			 // (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
-			 // (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
-			 // (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
-			 // (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
-			 // (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
-			 // (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
-			 // (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts] +
-			 // (D.f[DIR_000])[kzero] +
-			 // (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
-			 // (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
-			 // (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
-			 // (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw];// +RHO;
-
-    //     vxD[k] =  
-			 //((D.f[DIR_P00  ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-    //         (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-    //         (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-    //         (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VX;
-
-    //     vyD[k] =  
-			 //((D.f[DIR_0P0  ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-    //         (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-    //         (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-    //         (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VY;
-
-    //     vzD[k] =  
-			 //((D.f[DIR_00P  ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-    //         (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-    //         (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-    //         (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VZ;
-
-         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        c2o1*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        c3o1*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
-                        PRESS;    
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+        
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        //unsigned int kzero= k;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke   ];
+            real mfabb = (dist.f[DIR_M00])[kw];//[kw   ];  
+            real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn   ];
+            real mfbab = (dist.f[DIR_0M0])[ks];//[ks   ];  
+            real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt   ];
+            real mfbba = (dist.f[DIR_00M])[kb];//[kb   ];  
+            real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne  ];  
+            real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw  ];
+            real mfcab = (dist.f[DIR_PM0])[ks];//[kse  ]; 
+            real mfacb = (dist.f[DIR_MP0])[kw];//[knw  ]; 
+            real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte  ];  
+            real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw  ];
+            real mfcba = (dist.f[DIR_P0M])[kb];//[kbe  ]; 
+            real mfabc = (dist.f[DIR_M0P])[kw];//[ktw  ]; 
+            real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn  ];  
+            real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs  ];
+            real mfbca = (dist.f[DIR_0PM])[kb];//[kbn  ]; 
+            real mfbac = (dist.f[DIR_0MP])[ks];//[kts  ]; 
+            real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero];
+            real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; 
+            real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; 
+            real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ];
+            real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ];
+            real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ];
+            real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ];
+            real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; 
+            real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; 
+            ////////////////////////////////////////////////////////////////////////////////////
+            real drho = 
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+
+            real rho = c1o1 + drho;
+
+            rhoD[nodeIndex] = drho + RHO;
+
+            vxD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                (mfcbb - mfabb)) / rho) + VX;
+            vyD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                (mfbcb - mfbab)) / rho) + VY;
+            vzD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                (mfbbc - mfbba)) / rho) + VZ;
+
+            pressD[nodeIndex]  =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                c2o1*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                c3o1*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+
+                PRESS;    
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -841,309 +687,191 @@ __global__ void LBCalcMedCompSP27( real* vxD,
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBCalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    real* distributionsAD,
+    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep == true)
-	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
-	}
-	else
-	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	Distributions27 Dad;
-	if (isEvenTimestep == true)
-	{
-		Dad.f[DIR_P00]    = &DD_AD[DIR_P00   *size_Mat];
-		Dad.f[DIR_M00]    = &DD_AD[DIR_M00   *size_Mat];
-		Dad.f[DIR_0P0]    = &DD_AD[DIR_0P0   *size_Mat];
-		Dad.f[DIR_0M0]    = &DD_AD[DIR_0M0   *size_Mat];
-		Dad.f[DIR_00P]    = &DD_AD[DIR_00P   *size_Mat];
-		Dad.f[DIR_00M]    = &DD_AD[DIR_00M   *size_Mat];
-		Dad.f[DIR_PP0]   = &DD_AD[DIR_PP0  *size_Mat];
-		Dad.f[DIR_MM0]   = &DD_AD[DIR_MM0  *size_Mat];
-		Dad.f[DIR_PM0]   = &DD_AD[DIR_PM0  *size_Mat];
-		Dad.f[DIR_MP0]   = &DD_AD[DIR_MP0  *size_Mat];
-		Dad.f[DIR_P0P]   = &DD_AD[DIR_P0P  *size_Mat];
-		Dad.f[DIR_M0M]   = &DD_AD[DIR_M0M  *size_Mat];
-		Dad.f[DIR_P0M]   = &DD_AD[DIR_P0M  *size_Mat];
-		Dad.f[DIR_M0P]   = &DD_AD[DIR_M0P  *size_Mat];
-		Dad.f[DIR_0PP]   = &DD_AD[DIR_0PP  *size_Mat];
-		Dad.f[DIR_0MM]   = &DD_AD[DIR_0MM  *size_Mat];
-		Dad.f[DIR_0PM]   = &DD_AD[DIR_0PM  *size_Mat];
-		Dad.f[DIR_0MP]   = &DD_AD[DIR_0MP  *size_Mat];
-		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
-		Dad.f[DIR_PPP]  = &DD_AD[DIR_PPP *size_Mat];
-		Dad.f[DIR_MMP]  = &DD_AD[DIR_MMP *size_Mat];
-		Dad.f[DIR_PMP]  = &DD_AD[DIR_PMP *size_Mat];
-		Dad.f[DIR_MPP]  = &DD_AD[DIR_MPP *size_Mat];
-		Dad.f[DIR_PPM]  = &DD_AD[DIR_PPM *size_Mat];
-		Dad.f[DIR_MMM]  = &DD_AD[DIR_MMM *size_Mat];
-		Dad.f[DIR_PMM]  = &DD_AD[DIR_PMM *size_Mat];
-		Dad.f[DIR_MPM]  = &DD_AD[DIR_MPM *size_Mat];
-	}						
-	else					
-	{						
-		Dad.f[DIR_M00]    = &DD_AD[DIR_P00   *size_Mat];
-		Dad.f[DIR_P00]    = &DD_AD[DIR_M00   *size_Mat];
-		Dad.f[DIR_0M0]    = &DD_AD[DIR_0P0   *size_Mat];
-		Dad.f[DIR_0P0]    = &DD_AD[DIR_0M0   *size_Mat];
-		Dad.f[DIR_00M]    = &DD_AD[DIR_00P   *size_Mat];
-		Dad.f[DIR_00P]    = &DD_AD[DIR_00M   *size_Mat];
-		Dad.f[DIR_MM0]   = &DD_AD[DIR_PP0  *size_Mat];
-		Dad.f[DIR_PP0]   = &DD_AD[DIR_MM0  *size_Mat];
-		Dad.f[DIR_MP0]   = &DD_AD[DIR_PM0  *size_Mat];
-		Dad.f[DIR_PM0]   = &DD_AD[DIR_MP0  *size_Mat];
-		Dad.f[DIR_M0M]   = &DD_AD[DIR_P0P  *size_Mat];
-		Dad.f[DIR_P0P]   = &DD_AD[DIR_M0M  *size_Mat];
-		Dad.f[DIR_M0P]   = &DD_AD[DIR_P0M  *size_Mat];
-		Dad.f[DIR_P0M]   = &DD_AD[DIR_M0P  *size_Mat];
-		Dad.f[DIR_0MM]   = &DD_AD[DIR_0PP  *size_Mat];
-		Dad.f[DIR_0PP]   = &DD_AD[DIR_0MM  *size_Mat];
-		Dad.f[DIR_0MP]   = &DD_AD[DIR_0PM  *size_Mat];
-		Dad.f[DIR_0PM]   = &DD_AD[DIR_0MP  *size_Mat];
-		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
-		Dad.f[DIR_PPP]  = &DD_AD[DIR_MMM *size_Mat];
-		Dad.f[DIR_MMP]  = &DD_AD[DIR_PPM *size_Mat];
-		Dad.f[DIR_PMP]  = &DD_AD[DIR_MPM *size_Mat];
-		Dad.f[DIR_MPP]  = &DD_AD[DIR_PMM *size_Mat];
-		Dad.f[DIR_PPM]  = &DD_AD[DIR_MMP *size_Mat];
-		Dad.f[DIR_MMM]  = &DD_AD[DIR_PPP *size_Mat];
-		Dad.f[DIR_PMM]  = &DD_AD[DIR_MPP *size_Mat];
-		Dad.f[DIR_MPM]  = &DD_AD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k < size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		//index
-		//unsigned int kzero = k;
-		unsigned int ke = k;
-		unsigned int kw = neighborX[k];
-		unsigned int kn = k;
-		unsigned int ks = neighborY[k];
-		unsigned int kt = k;
-		unsigned int kb = neighborZ[k];
-		unsigned int ksw = neighborY[kw];
-		unsigned int kne = k;
-		unsigned int kse = ks;
-		unsigned int knw = kw;
-		unsigned int kbw = neighborZ[kw];
-		unsigned int kte = k;
-		unsigned int kbe = kb;
-		unsigned int ktw = kw;
-		unsigned int kbs = neighborZ[ks];
-		unsigned int ktn = k;
-		unsigned int kbn = kb;
-		unsigned int kts = ks;
-		unsigned int ktse = ks;
-		unsigned int kbnw = kbw;
-		unsigned int ktnw = kw;
-		unsigned int kbse = kbs;
-		unsigned int ktsw = ksw;
-		unsigned int kbne = kb;
-		unsigned int ktne = k;
-		unsigned int kbsw = neighborZ[ksw];
-		//////////////////////////////////////////////////////////////////////////
-		real CONC  = concD[k];
-		real PRESS = pressD[k];
-		real RHO   = rhoD[k];
-		real VX    = vxD[k];
-		real VY    = vyD[k];
-		real VZ    = vzD[k];
-		//////////////////////////////////////////////////////////////////////////
-		concD[k] = c0o1;
-		pressD[k] = c0o1;
-		rhoD[k] = c0o1;
-		vxD[k] = c0o1;
-		vyD[k] = c0o1;
-		vzD[k] = c0o1;
-
-		if (geoD[k] == GEO_FLUID)
-		{
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
-			real mfbbb = (D.f[DIR_000])[k];//[kzero];
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
-			////////////////////////////////////////////////////////////////////////////////////
-			real drho =
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) + mfbbb;
-			real rho = c1o1 + drho;
-			////////////////////////////////////////////////////////////////////////////////////
-
-			rhoD[k] = drho + RHO;
-
-			vxD[k] =
-				(((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-					(mfcbb - mfabb)) / rho) + VX;
-			
-			vyD[k] =
-				(((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-					(mfbcb - mfbab)) / rho) + VY;
-			
-			vzD[k] =
-				(((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-					(mfbbc - mfbba)) / rho) + VZ;
-
-			pressD[k] = 
-				((D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
-				 (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
-				 (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
-				 c2o1*(
-				 (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
-				 (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
-				 (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
-				 (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
-				 (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
-				 (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts]) +
-				 c3o1*(
-				 (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
-				 (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
-				 (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
-				 (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw]) -
-				 rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * c1o2 + rhoD[k] +
-				 PRESS;
-				 //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-			//////////////////////////////////////////////////////////////////////////
-			mfcbb = (Dad.f[DIR_P00   ])[k   ];
-			mfabb = (Dad.f[DIR_M00   ])[kw  ];
-			mfbcb = (Dad.f[DIR_0P0   ])[k   ];
-			mfbab = (Dad.f[DIR_0M0   ])[ks  ];
-			mfbbc = (Dad.f[DIR_00P   ])[k   ];
-			mfbba = (Dad.f[DIR_00M   ])[kb  ];
-			mfccb = (Dad.f[DIR_PP0  ])[k   ];
-			mfaab = (Dad.f[DIR_MM0  ])[ksw ];
-			mfcab = (Dad.f[DIR_PM0  ])[ks  ];
-			mfacb = (Dad.f[DIR_MP0  ])[kw  ];
-			mfcbc = (Dad.f[DIR_P0P  ])[k   ];
-			mfaba = (Dad.f[DIR_M0M  ])[kbw ];
-			mfcba = (Dad.f[DIR_P0M  ])[kb  ];
-			mfabc = (Dad.f[DIR_M0P  ])[kw  ];
-			mfbcc = (Dad.f[DIR_0PP  ])[k   ];
-			mfbaa = (Dad.f[DIR_0MM  ])[kbs ];
-			mfbca = (Dad.f[DIR_0PM  ])[kb  ];
-			mfbac = (Dad.f[DIR_0MP  ])[ks  ];
-			mfbbb = (Dad.f[DIR_000])[k   ];
-			mfccc = (Dad.f[DIR_PPP ])[k   ];
-			mfaac = (Dad.f[DIR_MMP ])[ksw ];
-			mfcac = (Dad.f[DIR_PMP ])[ks  ];
-			mfacc = (Dad.f[DIR_MPP ])[kw  ];
-			mfcca = (Dad.f[DIR_PPM ])[kb  ];
-			mfaaa = (Dad.f[DIR_MMM ])[kbsw];
-			mfcaa = (Dad.f[DIR_PMM ])[kbs ];
-			mfaca = (Dad.f[DIR_MPM ])[kbw ];
-			//////////////////////////////////////////////////////////////////////////
-			concD[k] = 
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa)   + (mfaac + mfcca))) +
-				 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba)   + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) +  mfbbb + CONC;
-		}
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if ( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist, distAD;
+        getPointersToDistributions(dist,   distributions,   numberOfLBnodes, isEvenTimestep);
+        getPointersToDistributions(distAD, distributionsAD, numberOfLBnodes, isEvenTimestep);
+
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        //unsigned int kzero = k;
+        unsigned int ke = nodeIndex;
+        unsigned int kw = neighborX[nodeIndex];
+        unsigned int kn = nodeIndex;
+        unsigned int ks = neighborY[nodeIndex];
+        unsigned int kt = nodeIndex;
+        unsigned int kb = neighborZ[nodeIndex];
+        unsigned int ksw = neighborY[kw];
+        unsigned int kne = nodeIndex;
+        unsigned int kse = ks;
+        unsigned int knw = kw;
+        unsigned int kbw = neighborZ[kw];
+        unsigned int kte = nodeIndex;
+        unsigned int kbe = kb;
+        unsigned int ktw = kw;
+        unsigned int kbs = neighborZ[ks];
+        unsigned int ktn = nodeIndex;
+        unsigned int kbn = kb;
+        unsigned int kts = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real CONC  = concD[nodeIndex];
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        concD[nodeIndex]  = c0o1;
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if (geoD[nodeIndex] == GEO_FLUID)
+        {
+            real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke   ];
+            real mfabb = (dist.f[DIR_M00])[kw];//[kw   ];  
+            real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn   ];
+            real mfbab = (dist.f[DIR_0M0])[ks];//[ks   ];  
+            real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt   ];
+            real mfbba = (dist.f[DIR_00M])[kb];//[kb   ];  
+            real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne  ];  
+            real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw  ];
+            real mfcab = (dist.f[DIR_PM0])[ks];//[kse  ]; 
+            real mfacb = (dist.f[DIR_MP0])[kw];//[knw  ]; 
+            real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte  ];  
+            real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw  ];
+            real mfcba = (dist.f[DIR_P0M])[kb];//[kbe  ]; 
+            real mfabc = (dist.f[DIR_M0P])[kw];//[ktw  ]; 
+            real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn  ];  
+            real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs  ];
+            real mfbca = (dist.f[DIR_0PM])[kb];//[kbn  ]; 
+            real mfbac = (dist.f[DIR_0MP])[ks];//[kts  ]; 
+            real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero];
+            real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; 
+            real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; 
+            real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ];
+            real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ];
+            real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ];
+            real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ];
+            real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; 
+            real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; 
+            ////////////////////////////////////////////////////////////////////////////////////
+            real drho =
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) + mfbbb;
+            real rho = c1o1 + drho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            
+            rhoD[nodeIndex] = drho + RHO;
+            
+            vxD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                    (mfcbb - mfabb)) / rho) + VX;
+            
+            vyD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                    (mfbcb - mfbab)) / rho) + VY;
+            
+            vzD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                    (mfbbc - mfbba)) / rho) + VZ;
+            
+            pressD[nodeIndex] = 
+                ((dist.f[DIR_P00])[ke] + (dist.f[DIR_M00])[kw] +
+                 (dist.f[DIR_0P0])[kn] + (dist.f[DIR_0M0])[ks] +
+                 (dist.f[DIR_00P])[kt] + (dist.f[DIR_00M])[kb] +
+                 c2o1*(
+                 (dist.f[DIR_PP0])[kne] + (dist.f[DIR_MM0])[ksw] +
+                 (dist.f[DIR_PM0])[kse] + (dist.f[DIR_MP0])[knw] +
+                 (dist.f[DIR_P0P])[kte] + (dist.f[DIR_M0M])[kbw] +
+                 (dist.f[DIR_P0M])[kbe] + (dist.f[DIR_M0P])[ktw] +
+                 (dist.f[DIR_0PP])[ktn] + (dist.f[DIR_0MM])[kbs] +
+                 (dist.f[DIR_0PM])[kbn] + (dist.f[DIR_0MP])[kts]) +
+                 c3o1*(
+                 (dist.f[DIR_PPP])[ktne] + (dist.f[DIR_MMP])[ktsw] +
+                 (dist.f[DIR_PMP])[ktse] + (dist.f[DIR_MPP])[ktnw] +
+                 (dist.f[DIR_PPM])[kbne] + (dist.f[DIR_MMM])[kbsw] +
+                 (dist.f[DIR_PMM])[kbse] + (dist.f[DIR_MPM])[kbnw]) -
+                 rhoD[nodeIndex] - (vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1 + rhoD[nodeIndex])) * c1o2 + rhoD[nodeIndex] +
+                 PRESS;
+                 //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+            //////////////////////////////////////////////////////////////////////////
+            mfcbb = (distAD.f[DIR_P00])[nodeIndex   ];
+            mfabb = (distAD.f[DIR_M00])[kw  ];
+            mfbcb = (distAD.f[DIR_0P0])[nodeIndex   ];
+            mfbab = (distAD.f[DIR_0M0])[ks  ];
+            mfbbc = (distAD.f[DIR_00P])[nodeIndex   ];
+            mfbba = (distAD.f[DIR_00M])[kb  ];
+            mfccb = (distAD.f[DIR_PP0])[nodeIndex   ];
+            mfaab = (distAD.f[DIR_MM0])[ksw ];
+            mfcab = (distAD.f[DIR_PM0])[ks  ];
+            mfacb = (distAD.f[DIR_MP0])[kw  ];
+            mfcbc = (distAD.f[DIR_P0P])[nodeIndex   ];
+            mfaba = (distAD.f[DIR_M0M])[kbw ];
+            mfcba = (distAD.f[DIR_P0M])[kb  ];
+            mfabc = (distAD.f[DIR_M0P])[kw  ];
+            mfbcc = (distAD.f[DIR_0PP])[nodeIndex   ];
+            mfbaa = (distAD.f[DIR_0MM])[kbs ];
+            mfbca = (distAD.f[DIR_0PM])[kb  ];
+            mfbac = (distAD.f[DIR_0MP])[ks  ];
+            mfbbb = (distAD.f[DIR_000])[nodeIndex   ];
+            mfccc = (distAD.f[DIR_PPP])[nodeIndex   ];
+            mfaac = (distAD.f[DIR_MMP])[ksw ];
+            mfcac = (distAD.f[DIR_PMP])[ks  ];
+            mfacc = (distAD.f[DIR_MPP])[kw  ];
+            mfcca = (distAD.f[DIR_PPM])[kb  ];
+            mfaaa = (distAD.f[DIR_MMM])[kbsw];
+            mfcaa = (distAD.f[DIR_PMM])[kbs ];
+            mfaca = (distAD.f[DIR_MPM])[kbw ];
+            //////////////////////////////////////////////////////////////////////////
+            concD[nodeIndex] = 
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa)   + (mfaac + mfcca))) +
+                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba)   + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) +  mfbbb + CONC;
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1168,54 +896,50 @@ __global__ void LBCalcMedCompAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMacMedSP27( real* vxD,
-                                             real* vyD,
-                                             real* vzD,
-                                             real* rhoD,
-                                             real* pressD,
-                                             unsigned int* geoD,
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int tdiff,
-                                             unsigned int size_Mat,
-                                             bool isEvenTimestep)
+__global__ void LBCalcMacMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int tdiff,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-      rhoD[k]   = c0o1;
-      vxD[k]    = c0o1;
-      vyD[k]    = c0o1;
-      vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   RHO   / tdiff;
-         vxD[k]     =   VX    / tdiff;
-         vyD[k]     =   VY    / tdiff;
-         vzD[k]     =   VZ    / tdiff;
-         pressD[k]  =   PRESS / tdiff;    
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if(nodeIndex<numberOfLBnodes)
+    {
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+       
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex]    =   RHO   / tdiff;
+            vxD[nodeIndex]     =   VX    / tdiff;
+            vyD[nodeIndex]     =   VY    / tdiff;
+            vzD[nodeIndex]     =   VZ    / tdiff;
+            pressD[nodeIndex]  =   PRESS / tdiff;    
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1241,34 +965,29 @@ __global__ void LBCalcMacMedSP27( real* vxD,
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		pressD[k] = c0o1;
-		rhoD[k] = c0o1;
-		vxD[k] = c0o1;
-		vyD[k] = c0o1;
-		vzD[k] = c0o1;
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if ( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex] = c0o1;
+        vxD[nodeIndex] = c0o1;
+        vyD[nodeIndex] = c0o1;
+        vzD[nodeIndex] = c0o1;
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1294,36 +1013,30 @@ __global__ void LBResetMedianValuesSP27(
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k < size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		concD[k]  = c0o1;
-		pressD[k] = c0o1;
-		rhoD[k]   = c0o1;
-		vxD[k]    = c0o1;
-		vyD[k]    = c0o1;
-		vzD[k]    = c0o1;
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if (nodeIndex < numberOfLBnodes)
+    {
+        concD[nodeIndex]  = c0o1;
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1348,177 +1061,121 @@ __global__ void LBResetMedianValuesAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMeasurePoints( real* vxMP,
-												real* vyMP,
-												real* vzMP,
-												real* rhoMP,
-												unsigned int* kMP,
-												unsigned int numberOfPointskMP,
-												unsigned int MPClockCycle,
-												unsigned int t,
-												unsigned int* geoD,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												real* DD,
-												bool isEvenTimestep)
+__global__ void LBCalcMeasurePoints(
+    real* vxMP,
+    real* vyMP,
+    real* vzMP,
+    real* rhoMP,
+    unsigned int* kMP,
+    unsigned int numberOfPointskMP,
+    unsigned int MPClockCycle,
+    unsigned int t,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep==true)
-	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-	} 
-	else
-	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if(k<numberOfPointskMP)
-	{
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= kMP[k];//k;
-      unsigned int ke   = kzero;
-      unsigned int kw   = neighborX[kzero];
-      unsigned int kn   = kzero;
-      unsigned int ks   = neighborY[kzero];
-      unsigned int kt   = kzero;
-      unsigned int kb   = neighborZ[kzero];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = kzero;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = kzero;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = kzero;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = kzero;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-	  unsigned int kMac = k*MPClockCycle + t;
-	  //////////////////////////////////////////////////////////////////////////
-
-      if(geoD[kzero] == GEO_FLUID)
-      {
-         rhoMP[kMac]=   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vxMP[kMac] =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         vyMP[kMac] =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vzMP[kMac] =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfPointskMP )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= kMP[nodeIndex];//k;
+        unsigned int ke   = kzero;
+        unsigned int kw   = neighborX[kzero];
+        unsigned int kn   = kzero;
+        unsigned int ks   = neighborY[kzero];
+        unsigned int kt   = kzero;
+        unsigned int kb   = neighborZ[kzero];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = kzero;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = kzero;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = kzero;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = kzero;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+	    unsigned int kMac = nodeIndex*MPClockCycle + t;
+	    //////////////////////////////////////////////////////////////////////////
+        
+        if(geoD[kzero] == GEO_FLUID)
+        {
+            rhoMP[kMac]= (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                         (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                         (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                         (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                         (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                         (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                         (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_000])[kzero]+ 
+                         (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                         (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vxMP[kMac] = (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                         (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                         (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                         (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                         (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            vyMP[kMac] = (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                         (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                         (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                         (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                         (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vzMP[kMac] = (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                         (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                         (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                         (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                         (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1559,40 +1216,36 @@ __global__ void LBCalcMeasurePoints( real* vxMP,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBSetOutputWallVelocitySP27( real* vxD,
-														real* vyD,
-														real* vzD,
-														real* vxWall,
-														real* vyWall,
-														real* vzWall,
-														int numberOfWallNodes, 
-														int* kWallNodes, 
-														real* rhoD,
-														real* pressD,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep)
+__global__ void LBSetOutputWallVelocitySP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* vxWall,
+    real* vyWall,
+    real* vzWall,
+    int numberOfWallNodes, 
+    int* kWallNodes, 
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
-
-   if(k<numberOfWallNodes)
+   if(nodeIndex<numberOfWallNodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
-      unsigned int KWN  = kWallNodes[k];
+      unsigned int KWN  = kWallNodes[nodeIndex];
       //////////////////////////////////////////////////////////////////////////
       vxD[KWN] = 0.0;//vxWall[k];
       vyD[KWN] = 0.0;//vyWall[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
index a79588421a624cae62ec32127739efb47bb7b2ef..457623d4ee62b624248306b6b900fcff3f026286 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
@@ -15,7 +15,7 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -29,7 +29,7 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -842,7 +842,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 														   unsigned int* neighborY,
 														   unsigned int* neighborZ,
 														   real* DDStart,
-														   int size_Mat,
+														   unsigned long long numberOfLBnodes,
 														   bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -856,7 +856,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -867,63 +867,63 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -956,33 +956,33 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
-         real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
-         real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
-         real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
-         real f_F     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
-         real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
-         real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
-         real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
-         real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
-         real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
-         real f_Ef    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
-         real f_Wb    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
-         real f_Eb    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
-         real f_Wf    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
-         real f_Nf    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
-         real f_Sb    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
-         real f_Nb    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
-         real f_Sf    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+         real f_E     = (D.f[DIR_P00])[ke   ];// +  c2over27 ;
+         real f_W     = (D.f[DIR_M00])[kw   ];// +  c2over27 ;
+         real f_N     = (D.f[DIR_0P0])[kn   ];// +  c2over27 ;
+         real f_S     = (D.f[DIR_0M0])[ks   ];// +  c2over27 ;
+         real f_F     = (D.f[DIR_00P])[kt   ];// +  c2over27 ;
+         real f_B     = (D.f[DIR_00M])[kb   ];// +  c2over27 ;
+         real f_NE    = (D.f[DIR_PP0])[kne  ];// +  c1over54 ;
+         real f_SW    = (D.f[DIR_MM0])[ksw  ];// +  c1over54 ;
+         real f_SE    = (D.f[DIR_PM0])[kse  ];// +  c1over54 ;
+         real f_NW    = (D.f[DIR_MP0])[knw  ];// +  c1over54 ;
+         real f_Ef    = (D.f[DIR_P0P])[kte  ];// +  c1over54 ;
+         real f_Wb    = (D.f[DIR_M0M])[kbw  ];// +  c1over54 ;
+         real f_Eb    = (D.f[DIR_P0M])[kbe  ];// +  c1over54 ;
+         real f_Wf    = (D.f[DIR_M0P])[ktw  ];// +  c1over54 ;
+         real f_Nf    = (D.f[DIR_0PP])[ktn  ];// +  c1over54 ;
+         real f_Sb    = (D.f[DIR_0MM])[kbs  ];// +  c1over54 ;
+         real f_Nb    = (D.f[DIR_0PM])[kbn  ];// +  c1over54 ;
+         real f_Sf    = (D.f[DIR_0MP])[kts  ];// +  c1over54 ;
          real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
-         real f_Nef   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
-         real f_Swf   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
-         real f_Sef   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
-         real f_Nwf   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
-         real f_Neb   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
-         real f_Swb   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
-         real f_Seb   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
-         real f_Nwb   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
+         real f_Nef   = (D.f[DIR_PPP])[ktne ];// +  c1over216;
+         real f_Swf   = (D.f[DIR_MMP])[ktsw ];// +  c1over216;
+         real f_Sef   = (D.f[DIR_PMP])[ktse ];// +  c1over216;
+         real f_Nwf   = (D.f[DIR_MPP])[ktnw ];// +  c1over216;
+         real f_Neb   = (D.f[DIR_PPM])[kbne ];// +  c1over216;
+         real f_Swb   = (D.f[DIR_MMM])[kbsw ];// +  c1over216;
+         real f_Seb   = (D.f[DIR_PMM])[kbse ];// +  c1over216;
+         real f_Nwb   = (D.f[DIR_MPM])[kbnw ];// +  c1over216;
          ////////////////////////////////////////////////////////////////////////////////////
 		 real rho=f_NW+f_W+f_SW+f_S+f_SE+f_E+f_NE+f_N+f_R+f_Nf+f_Nb+f_Sf+f_Sb+f_Ef+f_Eb+f_Wf+f_Wb+f_Nwf+f_Nwb+f_Nef+f_Neb+f_Swf+f_Swb+f_Sef+f_Seb+f_F+f_B+c1o1;// ACHTUNG ne EINS !!!!!!!!
 		 real pix=(f_NE+f_E+f_SE+f_Ef+f_Eb-f_NW-f_W-f_SW-f_Wf-f_Wb+f_Nef+f_Neb+f_Sef+f_Seb-f_Nwf-f_Nwb-f_Swf-f_Swb);
@@ -1689,7 +1689,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
                                                          real* DDStart,
-                                                         int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -1703,7 +1703,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -1714,63 +1714,63 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -1803,33 +1803,33 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  (fTNE+fBSW)+(fTSW+fBNE)+(fTSE+fBNW)+(fTNW+fBSE)+(fNE+fSW)+(fNW+fSE)+(fTE+fBW)+(fBE+fTW)+(fTN+fBS)+(fBN+fTS)+(fE+fW)+(fN+fS)+(fT+fB)+fZERO;
          real rho    =  rho0 + c1o1;
@@ -2321,7 +2321,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
                                                       real* DDStart,
-                                                      int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -2335,7 +2335,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -2346,63 +2346,63 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2435,33 +2435,33 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -2846,7 +2846,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
                                                         real* DDStart,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -2860,7 +2860,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -2871,63 +2871,63 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2960,33 +2960,33 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -3368,7 +3368,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
                                                   unsigned int* neighborY,
                                                   unsigned int* neighborZ,
                                                   real* DDStart,
-                                                  int size_Mat,
+                                                  unsigned long long numberOfLBnodes,
                                                   bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -3382,7 +3382,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -3393,63 +3393,63 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
        Distributions27 D;
        if (EvenOrOdd==true)
        {
-          D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-          D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-          D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-          D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-          D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-          D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-          D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-          D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-          D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-          D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-          D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-          D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-          D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-          D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-          D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-          D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-          D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-          D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-          D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-          D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-          D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-          D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-          D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-          D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-          D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-          D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+          D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+          D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+          D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+          D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+          D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+          D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+          D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+          D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+          D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+          D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+          D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+          D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+          D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+          D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+          D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+          D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+          D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+          D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+          D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+          D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+          D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+          D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+          D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+          D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+          D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+          D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+          D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
        }
        else
        {
-          D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-          D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-          D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-          D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-          D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-          D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-          D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-          D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-          D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-          D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-          D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-          D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-          D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-          D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-          D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-          D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-          D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-          D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-          D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-          D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-          D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-          D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-          D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-          D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-          D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-          D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+          D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+          D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+          D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+          D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+          D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+          D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+          D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+          D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+          D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+          D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+          D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+          D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+          D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+          D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+          D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+          D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+          D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+          D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+          D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+          D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+          D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+          D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+          D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+          D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+          D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+          D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+          D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
        }
 
        ////////////////////////////////////////////////////////////////////////////////
@@ -3512,33 +3512,33 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-       f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
-       f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
-       f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
-       f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
-       f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
-       f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
-       f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
-       f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
-       f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
-       f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
-       f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
-       f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
-       f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
-       f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
-       f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
-       f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
-       f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
-       f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+       f_E    =  (D.f[DIR_P00])[ke]+c2o27;
+       f_W    =  (D.f[DIR_M00])[kw]+c2o27;
+       f_N    =  (D.f[DIR_0P0])[kn]+c2o27;
+       f_S    =  (D.f[DIR_0M0])[ks]+c2o27;
+       f_T    =  (D.f[DIR_00P])[kt]+c2o27;
+       f_B    =  (D.f[DIR_00M])[kb]+c2o27;
+       f_NE   =  (D.f[DIR_PP0])[kne]+c1o54;
+       f_SW   =  (D.f[DIR_MM0])[ksw]+c1o54;
+       f_SE   =  (D.f[DIR_PM0])[kse]+c1o54;
+       f_NW   =  (D.f[DIR_MP0])[knw]+c1o54;
+       f_TE   =  (D.f[DIR_P0P])[kte]+c1o54;
+       f_BW   =  (D.f[DIR_M0M])[kbw]+c1o54;
+       f_BE   =  (D.f[DIR_P0M])[kbe]+c1o54;
+       f_TW   =  (D.f[DIR_M0P])[ktw]+c1o54;
+       f_TN   =  (D.f[DIR_0PP])[ktn]+c1o54;
+       f_BS   =  (D.f[DIR_0MM])[kbs]+c1o54;
+       f_BN   =  (D.f[DIR_0PM])[kbn]+c1o54;
+       f_TS   =  (D.f[DIR_0MP])[kts]+c1o54;
        f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
-       f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
-       f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
-       f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
-       f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
-       f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
-       f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
-       f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
-       f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
+       f_TNE   = (D.f[DIR_PPP])[ktne]+c1o216;
+       f_TSW   = (D.f[DIR_MMP])[ktsw]+c1o216;
+       f_TSE   = (D.f[DIR_PMP])[ktse]+c1o216;
+       f_TNW   = (D.f[DIR_MPP])[ktnw]+c1o216;
+       f_BNE   = (D.f[DIR_PPM])[kbne]+c1o216;
+       f_BSW   = (D.f[DIR_MMM])[kbsw]+c1o216;
+       f_BSE   = (D.f[DIR_PMM])[kbse]+c1o216;
+       f_BNW   = (D.f[DIR_MPM])[kbnw]+c1o216;
        ////////////////////////////////////////////////////////////////////////////////
 
        if( BC == GEO_FLUID || BC == GEO_VELO)
@@ -4060,7 +4060,7 @@ __global__ void LB_Kernel_Casc27(real omega,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
                                             real* DDStart,
-                                            int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -4089,63 +4089,63 @@ __global__ void LB_Kernel_Casc27(real omega,
       Distributions27 D;
       if (EvenOrOdd==true)
       {
-         D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-         D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-         D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-         D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-         D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-         D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-         D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-         D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-         D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+         D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+         D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -4208,33 +4208,33 @@ __global__ void LB_Kernel_Casc27(real omega,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
-      f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
-      f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
-      f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
-      f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
-      f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
-      f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
-      f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
-      f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
-      f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
-      f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
-      f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
-      f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
-      f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
-      f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
-      f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
-      f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
-      f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+      f_E    =  (D.f[DIR_P00])[ke]+c2o27;
+      f_W    =  (D.f[DIR_M00])[kw]+c2o27;
+      f_N    =  (D.f[DIR_0P0])[kn]+c2o27;
+      f_S    =  (D.f[DIR_0M0])[ks]+c2o27;
+      f_T    =  (D.f[DIR_00P])[kt]+c2o27;
+      f_B    =  (D.f[DIR_00M])[kb]+c2o27;
+      f_NE   =  (D.f[DIR_PP0])[kne]+c1o54;
+      f_SW   =  (D.f[DIR_MM0])[ksw]+c1o54;
+      f_SE   =  (D.f[DIR_PM0])[kse]+c1o54;
+      f_NW   =  (D.f[DIR_MP0])[knw]+c1o54;
+      f_TE   =  (D.f[DIR_P0P])[kte]+c1o54;
+      f_BW   =  (D.f[DIR_M0M])[kbw]+c1o54;
+      f_BE   =  (D.f[DIR_P0M])[kbe]+c1o54;
+      f_TW   =  (D.f[DIR_M0P])[ktw]+c1o54;
+      f_TN   =  (D.f[DIR_0PP])[ktn]+c1o54;
+      f_BS   =  (D.f[DIR_0MM])[kbs]+c1o54;
+      f_BN   =  (D.f[DIR_0PM])[kbn]+c1o54;
+      f_TS   =  (D.f[DIR_0MP])[kts]+c1o54;
       f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
-      f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
-      f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
-      f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
-      f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
-      f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
-      f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
-      f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
-      f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
+      f_TNE   = (D.f[DIR_PPP])[ktne]+c1o216;
+      f_TSW   = (D.f[DIR_MMP])[ktsw]+c1o216;
+      f_TSE   = (D.f[DIR_PMP])[ktse]+c1o216;
+      f_TNW   = (D.f[DIR_MPP])[ktnw]+c1o216;
+      f_BNE   = (D.f[DIR_PPM])[kbne]+c1o216;
+      f_BSW   = (D.f[DIR_MMM])[kbsw]+c1o216;
+      f_BSE   = (D.f[DIR_PMM])[kbse]+c1o216;
+      f_BNW   = (D.f[DIR_MPM])[kbnw]+c1o216;
       ////////////////////////////////////////////////////////////////////////////////
 
       if( BC == GEO_FLUID || BC == GEO_VELO)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 22192216927f91c33fafc23c54c3fae334abdd34..9fd2a6b2f5c5c10a36856852db47f989ace714ce 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -5,72 +5,65 @@
 #include <math.h>
 
 #include <Parameter/Parameter.h>
+
 #include "Parameter/CudaStreamManager.h"
-#include "PreCollisionInteractor/ActuatorLine.h"
+#include "PreCollisionInteractor/ActuatorFarm.h"
 #include "PreCollisionInteractor/Probes/Probe.h"
+#include <PreCollisionInteractor/PrecursorWriter.h>
 
 #include "Calculation/PorousMedia.h"
 
 #include "lbm/constants/NumericConstants.h"
 
-void CudaMemoryManager::cudaAllocFull(int lev)
-{
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo      ), parameter->getParH(lev)->mem_size_int  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->k        ), parameter->getParH(lev)->mem_size_int  ));
-}
-void CudaMemoryManager::cudaFreeFull(int lev)
-{
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->geo   ));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->k     ));
-}
+
 void CudaMemoryManager::cudaCopyPrint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho  , parameter->getParD(lev)->rho  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho         , parameter->getParD(lev)->rho         , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure    , parameter->getParD(lev)->pressure    , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 
     if(parameter->getIsBodyForce())
     {
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP   , parameter->getParD(lev)->forceX_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP   , parameter->getParD(lev)->forceY_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP   , parameter->getParD(lev)->forceZ_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP   , parameter->getParD(lev)->forceX_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP   , parameter->getParD(lev)->forceY_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP   , parameter->getParD(lev)->forceZ_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
     }
 
     if(parameter->getUseTurbulentViscosity())
     {
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity   , parameter->getParD(lev)->turbViscosity   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity   , parameter->getParD(lev)->turbViscosity   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
     }
 }
 void CudaMemoryManager::cudaCopyMedianPrint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med   , parameter->getParD(lev)->vx_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med   , parameter->getParD(lev)->vy_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med   , parameter->getParD(lev)->vz_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med  , parameter->getParD(lev)->rho_SP_Med  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med   , parameter->getParD(lev)->vx_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med   , parameter->getParD(lev)->vy_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med   , parameter->getParD(lev)->vz_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med  , parameter->getParD(lev)->rho_SP_Med  , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaAllocCoord(int lev)
 {
 	//Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//Device (spinning ship + uppsala)
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyCoord(int lev)
 {
 	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX,  parameter->getParH(lev)->coordinateX,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY,  parameter->getParH(lev)->coordinateY,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ,  parameter->getParH(lev)->coordinateZ,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX,  parameter->getParH(lev)->coordinateX,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY,  parameter->getParH(lev)->coordinateY,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ,  parameter->getParH(lev)->coordinateZ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeCoord(int lev)
 {
@@ -81,24 +74,24 @@ void CudaMemoryManager::cudaFreeCoord(int lev)
 void CudaMemoryManager::cudaAllocBodyForce(int lev)
 {
     //Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//Device
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 
 }
 void CudaMemoryManager::cudaCopyBodyForce(int lev)
 {
    	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP,  parameter->getParH(lev)->forceX_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP,  parameter->getParH(lev)->forceY_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP,  parameter->getParH(lev)->forceZ_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP,  parameter->getParH(lev)->forceX_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP,  parameter->getParH(lev)->forceY_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP,  parameter->getParH(lev)->forceZ_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 
 }
 void CudaMemoryManager::cudaFreeBodyForce(int lev)
@@ -111,71 +104,71 @@ void CudaMemoryManager::cudaFreeBodyForce(int lev)
 //print
 void CudaMemoryManager::cudaCopyDataToHost(int lev)
 {
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho  , parameter->getParD(lev)->rho  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho         , parameter->getParD(lev)->rho         , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure    , parameter->getParD(lev)->pressure    , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 //sparse
 void CudaMemoryManager::cudaAllocSP(int lev)
 {
 	//Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode           ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho          ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure        ), parameter->getParH(lev)->mem_size_real_SP));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho           ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure      ), parameter->getParH(lev)->memSizeRealLBnodes    ));
 	//Device
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode               ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho              ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure            ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0]           ), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode    ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho               ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure          ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0]), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 4. * (double)parameter->getParH(lev)->mem_size_int_SP + 5. * (double)parameter->getParH(lev)->mem_size_real_SP + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 4. * (double)parameter->getParH(lev)->memSizeLonglongLBnodes + 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopySP(int lev)
 {
 	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode       ,  parameter->getParH(lev)->typeOfGridNode       ,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX,  parameter->getParH(lev)->neighborX,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY,  parameter->getParH(lev)->neighborY,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ,  parameter->getParH(lev)->neighborZ,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho      ,  parameter->getParH(lev)->rho      ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX       ,  parameter->getParH(lev)->velocityX       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY       ,  parameter->getParH(lev)->velocityY       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ       ,  parameter->getParH(lev)->velocityZ       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure    ,  parameter->getParH(lev)->pressure    ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode, parameter->getParH(lev)->typeOfGridNode,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX     , parameter->getParH(lev)->neighborX     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY     , parameter->getParH(lev)->neighborY     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ     , parameter->getParH(lev)->neighborZ     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho           , parameter->getParH(lev)->rho           ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX     , parameter->getParH(lev)->velocityX     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY     , parameter->getParH(lev)->velocityY     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ     , parameter->getParH(lev)->velocityZ     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure      , parameter->getParH(lev)->pressure      ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeSP(int lev)
 {
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho      ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure    ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho            ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure       ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ      ));
 }
 void CudaMemoryManager::cudaAllocF3SP(int lev)
 {
     //Device
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)6 * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)6 * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 
@@ -207,20 +200,20 @@ void CudaMemoryManager::cudaAllocVeloBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.q27[0]),  parameter->getD3Qxx()*mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k),                  mem_size_inflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz),            mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC),              mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k),                             mem_size_inflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz),                       mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC),                         mem_size_inflow_Q_q ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.q27[0]),      parameter->getD3Qxx()*mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k),                      mem_size_inflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz),                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k),                                 mem_size_inflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz),                           mem_size_inflow_Q_q ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_inflow_Q_k + 4. * (double)mem_size_inflow_Q_q + (double)parameter->getD3Qxx() * (double)mem_size_inflow_Q_q;
@@ -232,13 +225,14 @@ void CudaMemoryManager::cudaCopyVeloBC(int lev)
 	unsigned int mem_size_inflow_Q_q = sizeof(real)*parameter->getParH(lev)->velocityBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.q27[0],  parameter->getParH(lev)->velocityBC.q27[0], parameter->getD3Qxx()* mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k,       parameter->getParH(lev)->velocityBC.k,                  mem_size_inflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx,      parameter->getParH(lev)->velocityBC.Vx,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy,      parameter->getParH(lev)->velocityBC.Vy,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz,      parameter->getParH(lev)->velocityBC.Vz,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz,            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k,       parameter->getParH(lev)->velocityBC.k,                             mem_size_inflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx,      parameter->getParH(lev)->velocityBC.Vx,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy,      parameter->getParH(lev)->velocityBC.Vy,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz,      parameter->getParH(lev)->velocityBC.Vz,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz,                       mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
 
 }
+
 void CudaMemoryManager::cudaFreeVeloBC(int lev)
 {
 	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityBC.q27[0] ));
@@ -256,15 +250,15 @@ void CudaMemoryManager::cudaAllocOutflowBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.q27[0]), parameter->getD3Qxx()*mem_size_outflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k),                 mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN),                mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC),             mem_size_outflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k),                            mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN),                           mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC),                        mem_size_outflow_Q_q ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.q27[0]),     parameter->getD3Qxx()* mem_size_outflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k),                      mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN),                     mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC),                  mem_size_outflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k),                                 mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN),                                mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC),                             mem_size_outflow_Q_q ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_outflow_Q_q + 2. * (double)mem_size_outflow_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_outflow_Q_q;
@@ -276,9 +270,9 @@ void CudaMemoryManager::cudaCopyOutflowBC(int lev)
 	unsigned int mem_size_outflow_Q_q = sizeof(real)*parameter->getParH(lev)->outflowBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.q27[0],  parameter->getParH(lev)->outflowBC.q27[0], parameter->getD3Qxx()* mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k,       parameter->getParH(lev)->outflowBC.k,                  mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN,      parameter->getParH(lev)->outflowBC.kN,                 mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC,   parameter->getParH(lev)->outflowBC.RhoBC,              mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k,       parameter->getParH(lev)->outflowBC.k,                             mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN,      parameter->getParH(lev)->outflowBC.kN,                            mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC,   parameter->getParH(lev)->outflowBC.RhoBC,                         mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeOutflowBC(int lev)
 {
@@ -297,13 +291,13 @@ void CudaMemoryManager::cudaAllocNoSlipBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k),                 mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread),             mem_size_Q_q_read ));//Geller
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ),            mem_size_Q_value  ));//Geller
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k),                            mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread),                        mem_size_Q_q_read ));//Geller
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ),                       mem_size_Q_value  ));//Geller
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k),                      mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k),                                 mem_size_Q_k     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -315,7 +309,7 @@ void CudaMemoryManager::cudaCopyNoSlipBC(int lev)
 	unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.q27[0], parameter->getParH(lev)->noSlipBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q,       cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k,      parameter->getParH(lev)->noSlipBC.k,                  mem_size_Q_k,       cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k,      parameter->getParH(lev)->noSlipBC.k,                             mem_size_Q_k,       cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeNoSlipBC(int lev)
 {
@@ -332,11 +326,11 @@ void CudaMemoryManager::cudaAllocGeomBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k),                 mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k),                            mem_size_Q_k      ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k),                      mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k),                                 mem_size_Q_k     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -348,7 +342,7 @@ void CudaMemoryManager::cudaCopyGeomBC(int lev)
 	unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->geometryBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.q27[0], parameter->getParH(lev)->geometryBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q,       cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k,      parameter->getParH(lev)->geometryBC.k,                  mem_size_Q_k,       cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k,      parameter->getParH(lev)->geometryBC.k,                             mem_size_Q_k,       cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeGeomBC(int lev)
 {
@@ -363,15 +357,15 @@ void CudaMemoryManager::cudaAllocPress(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k),                 mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN),                mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC),             mem_size_Q_q      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k),                            mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN),                           mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC),                        mem_size_Q_q      ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k),                      mem_size_Q_k     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN),                     mem_size_Q_k     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC),                  mem_size_Q_q     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k),                                 mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN),                                mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC),                             mem_size_Q_q     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = 2. * (double)mem_size_Q_k + (double)mem_size_Q_q + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -524,24 +518,24 @@ void CudaMemoryManager::cudaCopyProcessNeighborXIndex(int lev, unsigned int proc
 								cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor,
-                                                     const unsigned int &memsizeFsRecv, int streamIndex)
+                                                     const unsigned int &memsizeFsRecv)
 {
-    if (streamIndex == -1)
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
         checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0],
 						 parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
 						 parameter->getD3Qxx() * memsizeFsRecv,
 						 cudaMemcpyHostToDevice));
     else
         checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0],
-                         parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
-                         parameter->getD3Qxx() * memsizeFsRecv,
-                         cudaMemcpyHostToDevice,
-                         parameter->getStreamManager()->getStream(streamIndex)));
+                                         parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
+                                         parameter->getD3Qxx() * memsizeFsRecv,
+                                         cudaMemcpyHostToDevice,
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor,
-                                                     const unsigned int &memsizeFsSend, int streamIndex)
-{
-    if (streamIndex == -1)
+                                                     const unsigned int &memsizeFsSend)
+{  
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
     	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								parameter->getD3Qxx() * memsizeFsSend,
@@ -551,7 +545,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce
     								     parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								     parameter->getD3Qxx() * memsizeFsSend,
     								     cudaMemcpyDeviceToHost,
-                                         parameter->getStreamManager()->getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor)
 {
@@ -594,35 +588,33 @@ void CudaMemoryManager::cudaCopyProcessNeighborYIndex(int lev, unsigned int proc
 								parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].memsizeIndex,
 								cudaMemcpyHostToDevice));
 }
-void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
-                                                     int streamIndex)
+void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv)
 {
-    if (streamIndex == -1)
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0],
 								    parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
 								    parameter->getD3Qxx() * memsizeFsRecv,
 								    cudaMemcpyHostToDevice));
     else
-        checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0],
-                        parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
-                        parameter->getD3Qxx() * memsizeFsRecv,
-                        cudaMemcpyHostToDevice,
-                        parameter->getStreamManager()->getStream(streamIndex)));
+        checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0],
+                                         parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
+                                         parameter->getD3Qxx() * memsizeFsRecv,
+                                         cudaMemcpyHostToDevice,
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
-void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
-                                                     int streamIndex)
+void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend)
 {
-    if (streamIndex == -1)
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0],
 	    							parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
 	    							parameter->getD3Qxx() * memsizeFsSend,
 	    							cudaMemcpyDeviceToHost));
     else
-        checkCudaErrors(
-            cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0],
-                            parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
-                            parameter->getD3Qxx() * memsizeFsSend,
-                            cudaMemcpyDeviceToHost, parameter->getStreamManager()->getStream(streamIndex)));
+        checkCudaErrors( cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0],
+                                         parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
+                                         parameter->getD3Qxx() * memsizeFsSend,
+                                         cudaMemcpyDeviceToHost, 
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor)
 {
@@ -666,9 +658,9 @@ void CudaMemoryManager::cudaCopyProcessNeighborZIndex(int lev, unsigned int proc
 								cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor,
-                                                     const unsigned int &memsizeFsRecv, int streamIndex)
+                                                     const unsigned int &memsizeFsRecv)
 {
-    if (streamIndex == -1)
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    							parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    							parameter->getD3Qxx() * memsizeFsRecv,
@@ -678,12 +670,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce
 	    				                 parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    				                 parameter->getD3Qxx() * memsizeFsRecv,
 	    				                 cudaMemcpyHostToDevice,
-                                         parameter->getStreamManager()->getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor,
-                                                     const unsigned int &memsizeFsSend, int streamIndex)
+                                                     const unsigned int &memsizeFsSend)
 {
-    if (streamIndex == -1)
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
         checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        					    parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        					    parameter->getD3Qxx() * memsizeFsSend,
@@ -693,7 +685,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce
 	        						     parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        						     parameter->getD3Qxx() * memsizeFsSend,
 	        						     cudaMemcpyDeviceToHost,
-                                         parameter->getStreamManager()->getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor)
 {
@@ -887,17 +879,17 @@ void CudaMemoryManager::cudaFreeProcessNeighborF3Z(int lev, unsigned int process
 void CudaMemoryManager::cudaAllocNeighborWSB(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse    ), parameter->getParH(lev)->mem_size_int_SP    ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse    ), parameter->getParH(lev)->memSizeLonglongLBnodes    ));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse        ), parameter->getParD(lev)->mem_size_int_SP    ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse        ), parameter->getParD(lev)->memSizeLonglongLBnodes    ));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)parameter->getParH(lev)->mem_size_int_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeLonglongLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyNeighborWSB(int lev)
 {
     //copy host to device
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse,  parameter->getParH(lev)->neighborInverse,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse,  parameter->getParH(lev)->neighborInverse,  parameter->getParH(lev)->memSizeLonglongLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeNeighborWSB(int lev)
 {
@@ -907,7 +899,7 @@ void CudaMemoryManager::cudaFreeNeighborWSB(int lev)
 void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
 {
     //Host
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->memSizeRealLBnodes));
     //Debug
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSij ), parameter->getParH(lev)->mem_size_real_SP));
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSDij), parameter->getParH(lev)->mem_size_real_SP));
@@ -922,7 +914,7 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gDzvz), parameter->getParH(lev)->mem_size_real_SP));
 
     //Device
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->memSizeRealLBnodes));
     //Debug
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSij ), parameter->getParD(lev)->mem_size_real_SP));
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSDij), parameter->getParD(lev)->mem_size_real_SP));
@@ -937,13 +929,13 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gDzvz), parameter->getParD(lev)->mem_size_real_SP));
     // //////////////////////////////////////////////////////////////////////////
     // double tmp = (double)parameter->getParH(lev)->mem_size_real_SP * 12.0;
-    double tmp = (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev)
 {
     //copy host to device
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
     //Debug
     // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSij , parameter->getParH(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
     // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSDij, parameter->getParH(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
@@ -960,7 +952,7 @@ void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev)
 void CudaMemoryManager::cudaCopyTurbulentViscosityDH(int lev)
 {
     //copy device to host
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
     //Debug
     // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSij , parameter->getParD(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
     // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSDij, parameter->getParD(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
@@ -1062,29 +1054,29 @@ void CudaMemoryManager::cudaFreeTurbulenceIntensity(int lev)
 void CudaMemoryManager::cudaAllocMedianSP(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med      ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med    ), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med      ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med    ), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med          ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med        ), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med          ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med        ), parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 5. * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyMedianSP(int lev)
 {
     //copy host to device
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med  ,  parameter->getParH(lev)->rho_SP_Med  ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med   ,  parameter->getParH(lev)->vx_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med   ,  parameter->getParH(lev)->vy_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med   ,  parameter->getParH(lev)->vz_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med,  parameter->getParH(lev)->press_SP_Med,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med  ,  parameter->getParH(lev)->rho_SP_Med  ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med   ,  parameter->getParH(lev)->vx_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med   ,  parameter->getParH(lev)->vy_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med   ,  parameter->getParH(lev)->vz_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med,  parameter->getParH(lev)->press_SP_Med,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeMedianSP(int lev)
 {
@@ -1097,11 +1089,11 @@ void CudaMemoryManager::cudaFreeMedianSP(int lev)
 void CudaMemoryManager::cudaAllocMedianOut(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out      ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out    ), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out      ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out    ), parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaFreeMedianOut(int lev)
 {
@@ -1655,6 +1647,133 @@ void CudaMemoryManager::cudaFreeWallModel(int lev, bool hasWallModelMonitor)
     }
 }
 
+
+//Precursor BC
+void CudaMemoryManager::cudaAllocPrecursorBC(int lev)
+{   
+    uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int);
+    uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint);
+    uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real);
+
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.k, memSizeQInt));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal));
+    
+
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.planeNeighbor0PP, memSizeQUint));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.planeNeighbor0PM, memSizeQUint));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.planeNeighbor0MP, memSizeQUint));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.planeNeighbor0MM, memSizeQUint));
+
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.weights0PP, memSizeQReal));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.weights0PM, memSizeQReal));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.weights0MP, memSizeQReal));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.weights0MM, memSizeQReal));
+
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.k, memSizeQInt));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal));
+
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.planeNeighbor0PP, memSizeQUint));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.planeNeighbor0PM, memSizeQUint));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.planeNeighbor0MP, memSizeQUint));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.planeNeighbor0MM, memSizeQUint));
+
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.weights0PP, memSizeQReal));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.weights0PM, memSizeQReal));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.weights0MP, memSizeQReal));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.weights0MM, memSizeQReal));
+
+    real memSize = memSizeQInt+4*memSizeQUint+(4+parameter->getD3Qxx())*memSizeQReal;
+    setMemsizeGPU(memSize, false);
+
+}
+
+
+void CudaMemoryManager::cudaAllocPrecursorData(int lev)
+{
+    size_t size = parameter->getParH(lev)->precursorBC.numberOfPrecursorNodes*sizeof(real)*parameter->getParH(lev)->precursorBC.numberOfQuantities;
+
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.last, size));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.current, size));
+    checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.next, size));
+
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.last, size));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.current, size));
+    checkCudaErrors( cudaMalloc((void**) &parameter->getParD(lev)->precursorBC.next, size));
+    setMemsizeGPU(3*size, false);
+}
+
+
+void CudaMemoryManager::cudaCopyPrecursorBC(int lev)
+{
+    uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int);
+    uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint);
+    uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real);
+
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.k, parameter->getParH(lev)->precursorBC.k, memSizeQInt, cudaMemcpyHostToDevice));
+
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.q27[0], parameter->getParH(lev)->precursorBC.q27[0], memSizeQReal*parameter->getD3Qxx(), cudaMemcpyHostToDevice));
+
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0PP, parameter->getParH(lev)->precursorBC.planeNeighbor0PP, memSizeQUint, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0PM, parameter->getParH(lev)->precursorBC.planeNeighbor0PM, memSizeQUint, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0MP, parameter->getParH(lev)->precursorBC.planeNeighbor0MP, memSizeQUint, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0MM, parameter->getParH(lev)->precursorBC.planeNeighbor0MM, memSizeQUint, cudaMemcpyHostToDevice));
+
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0PP, parameter->getParH(lev)->precursorBC.weights0PP, memSizeQReal, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0PM, parameter->getParH(lev)->precursorBC.weights0PM, memSizeQReal, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0MP, parameter->getParH(lev)->precursorBC.weights0MP, memSizeQReal, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0MM, parameter->getParH(lev)->precursorBC.weights0MM, memSizeQReal, cudaMemcpyHostToDevice));
+}
+void CudaMemoryManager::cudaCopyPrecursorData(int lev)
+{
+    auto prec = &parameter->getParH(lev)->precursorBC;
+    auto precStream = parameter->getStreamManager()->getStream(CudaStreamIndex::Precursor);
+    size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities;
+    checkCudaErrors( cudaStreamSynchronize(precStream) );
+    checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, precStream) );
+}
+
+
+void CudaMemoryManager::cudaFreePrecursorBC(int lev)
+{
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.k));
+
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.q27[0]));
+
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0PP));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0PM));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0MP));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0MM));
+
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0PP));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0PM));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0MP));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0MM));
+
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.k));
+
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.q27[0]));
+
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0PP));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0PM));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0MP));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0MM));
+
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0PP));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0PM));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0MP));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0MM));
+}
+
+void CudaMemoryManager::cudaFreePrecursorData(int lev)
+{
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.last));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.current));
+    checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.next));
+
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.last));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.current));
+    checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.next));
+}
 //Test roundoff error
 void CudaMemoryManager::cudaAllocTestRE(int lev, unsigned int size)
 {
@@ -1913,15 +2032,15 @@ void CudaMemoryManager::cudaFreeMeasurePointsIndex(int lev)
 }
 void CudaMemoryManager::cudaAllocFsForCheckPointAndRestart(int lev)
 {
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ),           (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ),           (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaCopyFsForRestart(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0],  parameter->getParH(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0],  parameter->getParH(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaCopyFsForCheckPoint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0],  parameter->getParD(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0],  parameter->getParD(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaFreeFsForCheckPointAndRestart(int lev)
 {
@@ -2400,20 +2519,20 @@ void CudaMemoryManager::cudaFreePorousMedia(PorousMedia* pm, int lev)
 void CudaMemoryManager::cudaAllocConcentration(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->Conc), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->Conc), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->Conc), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->Conc), parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyConcentrationDeviceToHost(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->Conc, parameter->getParD(lev)->Conc,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->Conc, parameter->getParD(lev)->Conc,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaCopyConcentrationHostToDevice(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->Conc, parameter->getParH(lev)->Conc, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->Conc, parameter->getParH(lev)->Conc, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeConcentration(int lev)
 {
@@ -2425,14 +2544,14 @@ void CudaMemoryManager::cudaAllocTempFs(int lev)
     //Device
     if (parameter->getDiffMod() == 7)
     {
-        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP));
+        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes));
     }
     else if (parameter->getDiffMod() == 27)
     {
-        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD27.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP));
+        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD27.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes));
     }
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->mem_size_real_SP);
+    double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->memSizeRealLBnodes);
     setMemsizeGPU(tmp, false);
 }
 //////////////////////////////////////////////////////////////////////////
@@ -2627,12 +2746,12 @@ void CudaMemoryManager::cudaFreeConcFile(int lev)
 void CudaMemoryManager::cudaAllocMedianOutAD(int lev)
 {
 	//Host
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out),   parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out),     parameter->getParH(lev)->mem_size_real_SP));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out),   parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out),     parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaFreeMedianOutAD(int lev)
 {
@@ -2886,31 +3005,31 @@ void CudaMemoryManager::cudaFreeProcessNeighborADZ(int lev, unsigned int process
 void CudaMemoryManager::cudaAlloc2ndOrderDerivitivesIsoTest(int lev)
 {
     //Host
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device (spinning ship)
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
     //printf("Coord = %f MB",tmp/1000000.);
 }
 void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestDH(int lev)
 {
     //copy device to host
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestHD(int lev)
 {
     //copy host to device
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
 
 }
 void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev)
@@ -2921,231 +3040,297 @@ void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev)
 
 }
 
-void CudaMemoryManager::cudaAllocFluidNodeIndices(int lev) {
-    uint mem_size_geo_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodes;
+void CudaMemoryManager::cudaAllocTaggedFluidNodeIndices(CollisionTemplate tag, int lev) {
+    uint mem_size_tagged_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfTaggedFluidNodes[tag];
     // Host
-    checkCudaErrors(cudaMallocHost((void **)&(parameter->getParH(lev)->fluidNodeIndices), mem_size_geo_fluid_nodes));
+    checkCudaErrors(cudaMallocHost((void **)&(parameter->getParH(lev)->taggedFluidNodeIndices[tag]), mem_size_tagged_fluid_nodes));
     // Device
-    checkCudaErrors(cudaMalloc((void **)&(parameter->getParD(lev)->fluidNodeIndices), mem_size_geo_fluid_nodes));
+    checkCudaErrors(cudaMalloc((void **)&(parameter->getParD(lev)->taggedFluidNodeIndices[tag]), mem_size_tagged_fluid_nodes));
     //////////////////////////////////////////////////////////////////////////
-    setMemsizeGPU((double)mem_size_geo_fluid_nodes, false);
+    setMemsizeGPU((double)mem_size_tagged_fluid_nodes, false);
 }
 
-void CudaMemoryManager::cudaCopyFluidNodeIndices(int lev) {
-    uint mem_size_geo_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodes;
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fluidNodeIndices,
-                               parameter->getParH(lev)->fluidNodeIndices,
-                               mem_size_geo_fluid_nodes, cudaMemcpyHostToDevice));
+void CudaMemoryManager::cudaCopyTaggedFluidNodeIndices(CollisionTemplate tag, int lev) {
+    uint mem_size_tagged_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfTaggedFluidNodes[tag];
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->taggedFluidNodeIndices[tag],
+                               parameter->getParH(lev)->taggedFluidNodeIndices[tag],
+                               mem_size_tagged_fluid_nodes, cudaMemcpyHostToDevice));
 }
 
-void CudaMemoryManager::cudaFreeFluidNodeIndices(int lev) {
-    checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->fluidNodeIndices));
+void CudaMemoryManager::cudaFreeTaggedFluidNodeIndices(CollisionTemplate tag, int lev) {
+    checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->taggedFluidNodeIndices[tag]));
 }
 
-void CudaMemoryManager::cudaAllocFluidNodeIndicesBorder(int lev) {
-    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder;
-    // Host
-    checkCudaErrors(
-        cudaMallocHost((void **)&(parameter->getParH(lev)->fluidNodeIndicesBorder), mem_size_fluid_nodes_border));
-    // Device
-    checkCudaErrors(
-        cudaMalloc((void **)&(parameter->getParD(lev)->fluidNodeIndicesBorder), mem_size_fluid_nodes_border));
-    //////////////////////////////////////////////////////////////////////////
-    setMemsizeGPU((double)mem_size_fluid_nodes_border, false);
-}
+////////////////////////////////////////////////////////////////////////////////////
+//  ActuatorFarm
+///////////////////////////////////////////////////////////////////////////////
+void CudaMemoryManager::cudaAllocBladeGeometries(ActuatorFarm* actuatorFarm)
+{
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeRadiiH, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->diametersH, sizeRealTurbine) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosXH, sizeRealTurbine) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosYH, sizeRealTurbine) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosZH, sizeRealTurbine) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeRadiiD, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->diametersD, sizeRealTurbine) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosXD, sizeRealTurbine) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosYD, sizeRealTurbine) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosZD, sizeRealTurbine) );
+    setMemsizeGPU(sizeof(real)*(actuatorFarm->getNumberOfNodesPerBlade()+4)*actuatorFarm->getNumberOfTurbines(), false);
 
-void CudaMemoryManager::cudaCopyFluidNodeIndicesBorder(int lev) {
-    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder;
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fluidNodeIndicesBorder,
-                               parameter->getParH(lev)->fluidNodeIndicesBorder,
-                               mem_size_fluid_nodes_border, cudaMemcpyHostToDevice));
 }
+void CudaMemoryManager::cudaCopyBladeGeometriesHtoD(ActuatorFarm* actuatorFarm)
+{
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeRadiiD, actuatorFarm->bladeRadiiH, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->diametersD, actuatorFarm->diametersH, sizeRealTurbine, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosXD, actuatorFarm->turbinePosXH, sizeRealTurbine, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosYD, actuatorFarm->turbinePosYH, sizeRealTurbine, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosZD, actuatorFarm->turbinePosZH, sizeRealTurbine, cudaMemcpyHostToDevice) );
 
-void CudaMemoryManager::cudaFreeFluidNodeIndicesBorder(int lev) {
-    checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->fluidNodeIndicesBorder));
 }
+void CudaMemoryManager::cudaCopyBladeGeometriesDtoH(ActuatorFarm* actuatorFarm)
+{
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeRadiiH, actuatorFarm->bladeRadiiD, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->diametersH, actuatorFarm->diametersD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosXH, actuatorFarm->turbinePosXD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosYH, actuatorFarm->turbinePosYD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosZH, actuatorFarm->turbinePosZD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
 
-////////////////////////////////////////////////////////////////////////////////////
-//  ActuatorLine
-///////////////////////////////////////////////////////////////////////////////
+}
+void CudaMemoryManager::cudaFreeBladeGeometries(ActuatorFarm* actuatorFarm)
+{
+    checkCudaErrors( cudaFree(actuatorFarm->bladeRadiiD) );
+    checkCudaErrors( cudaFree(actuatorFarm->diametersD) );
+    checkCudaErrors( cudaFree(actuatorFarm->turbinePosXD) );
+    checkCudaErrors( cudaFree(actuatorFarm->turbinePosYD) );
+    checkCudaErrors( cudaFree(actuatorFarm->turbinePosZD) );    
+    
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeRadiiH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->diametersH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosXH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosYH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosZH) );
+}
 
-void CudaMemoryManager::cudaAllocBladeRadii(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocBladeOrientations(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes()) );
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->omegasH, sizeRealTurbine) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->azimuthsH, sizeRealTurbine) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->yawsH, sizeRealTurbine) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->omegasD, sizeRealTurbine) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->azimuthsD, sizeRealTurbine) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->yawsD, sizeRealTurbine) );
 
-    setMemsizeGPU(sizeof(real)*actuatorLine->getNBladeNodes(), false);
-}
+    setMemsizeGPU(3*sizeRealTurbine, false);
 
-void CudaMemoryManager::cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine)
-{
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiD, actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyHostToDevice) );
 }
+void CudaMemoryManager::cudaCopyBladeOrientationsHtoD(ActuatorFarm* actuatorFarm)
+{
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMemcpy(actuatorFarm->omegasD, actuatorFarm->omegasH, sizeRealTurbine, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->azimuthsD, actuatorFarm->azimuthsH, sizeRealTurbine, cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->yawsD, actuatorFarm->yawsH, sizeRealTurbine, cudaMemcpyHostToDevice) );
 
-void CudaMemoryManager::cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine)
+}
+void CudaMemoryManager::cudaCopyBladeOrientationsDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiH, actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyDeviceToHost) );
+    uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines();
+    checkCudaErrors( cudaMemcpy(actuatorFarm->omegasH, actuatorFarm->omegasD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->azimuthsH, actuatorFarm->azimuthsD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->yawsH, actuatorFarm->yawsD, sizeRealTurbine, cudaMemcpyDeviceToHost) );
 }
-
-void CudaMemoryManager::cudaFreeBladeRadii(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeBladeOrientations(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFree(actuatorLine->bladeRadiiD) );
+    checkCudaErrors( cudaFree((void**) &actuatorFarm->omegasD) );
+    checkCudaErrors( cudaFree((void**) &actuatorFarm->azimuthsD) );
+    checkCudaErrors( cudaFree((void**) &actuatorFarm->yawsD) );
 
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeRadiiH) );
+    checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->omegasH) );
+    checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->azimuthsH) );
+    checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->yawsH) );
 }
 
-void CudaMemoryManager::cudaAllocBladeCoords(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocBladeCoords(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );    
+    
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    setMemsizeGPU(3.f*actuatorLine->getNNodes(), false);
+    setMemsizeGPU(6.f*actuatorFarm->getNumberOfNodes(), false);
 }
 
-void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXD, actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYD, actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZD, actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXDCurrentTimestep, actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYDCurrentTimestep, actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZDCurrentTimestep, actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
 }
 
-void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXH, actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYH, actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZH, actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXH, actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYH, actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZH, actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
 }
 
-void CudaMemoryManager::cudaFreeBladeCoords(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeBladeCoords(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsXD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsYD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeCoordsZD) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsXDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsYDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsZDCurrentTimestep) );
 
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsXH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsYH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsZH) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsXDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsYDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsZDPreviousTimestep) );
+
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsXH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsYH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsZH) );
 }
 
-void CudaMemoryManager::cudaAllocBladeIndices(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocBladeIndices(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeIndicesD, sizeof(uint)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeIndicesD, sizeof(uint)*actuatorFarm->getNumberOfNodes()) );
 
-    setMemsizeGPU(sizeof(uint)*actuatorLine->getNNodes(), false);
+    setMemsizeGPU(sizeof(uint)*actuatorFarm->getNumberOfNodes(), false);
 }
 
-void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeIndicesD, actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeIndicesD, actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
 }
 
-void CudaMemoryManager::cudaFreeBladeIndices(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeBladeIndices(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFree(actuatorLine->bladeIndicesD) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeIndicesD) );
 
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeIndicesH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeIndicesH) );
 }
 
-void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNNodes(), false);
+    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false);
 }
 
-void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXD, actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYD, actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZD, actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXDCurrentTimestep, actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYDCurrentTimestep, actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZDCurrentTimestep, actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
 }
 
-void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXH, actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYH, actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZH, actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXH, actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYH, actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZH, actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
 }
 
-void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesXD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesYD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesZD) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesXDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesYDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesZDCurrentTimestep) );    
+    
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesXDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesYDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesZDPreviousTimestep) );
 
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesXH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesYH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesZH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesXH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesYH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesZH) );
 }
 
-void CudaMemoryManager::cudaAllocBladeForces(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocBladeForces(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
 
-    setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNNodes(), false);
+    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false);
 }
 
-void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXD, actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYD, actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZD, actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXDCurrentTimestep, actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYDCurrentTimestep, actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZDCurrentTimestep, actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
 }
 
-void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXH, actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYH, actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZH, actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXH, actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYH, actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZH, actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
 }
 
-void CudaMemoryManager::cudaFreeBladeForces(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeBladeForces(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFree(actuatorLine->bladeForcesXD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeForcesYD) );
-    checkCudaErrors( cudaFree(actuatorLine->bladeForcesZD) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesXDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesYDCurrentTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesZDCurrentTimestep) );
 
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesXH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesYH) );
-    checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesZH) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesXDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesYDPreviousTimestep) );
+    checkCudaErrors( cudaFree(actuatorFarm->bladeForcesZDPreviousTimestep) );
+
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesXH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesYH) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesZH) );
 }
 
-void CudaMemoryManager::cudaAllocSphereIndices(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaAllocSphereIndices(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &(actuatorLine->boundingSphereIndicesH), sizeof(int)*actuatorLine->getNIndices()));
-    checkCudaErrors( cudaMalloc((void**) &(actuatorLine->boundingSphereIndicesD), sizeof(int)*actuatorLine->getNIndices()));
-    setMemsizeGPU(sizeof(int)*actuatorLine->getNIndices(), false);
+    checkCudaErrors( cudaMallocHost((void**) &(actuatorFarm->boundingSphereIndicesH), sizeof(int)*actuatorFarm->getNumberOfIndices()));
+    checkCudaErrors( cudaMalloc((void**) &(actuatorFarm->boundingSphereIndicesD), sizeof(int)*actuatorFarm->getNumberOfIndices()));
+    setMemsizeGPU(sizeof(int)*actuatorFarm->getNumberOfIndices(), false);
 }
 
-void CudaMemoryManager::cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaCopySphereIndicesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorLine->boundingSphereIndicesD, actuatorLine->boundingSphereIndicesH, sizeof(int)*actuatorLine->getNIndices(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->boundingSphereIndicesD, actuatorFarm->boundingSphereIndicesH, sizeof(int)*actuatorFarm->getNumberOfIndices(), cudaMemcpyHostToDevice) );
 }
 
-void CudaMemoryManager::cudaFreeSphereIndices(ActuatorLine* actuatorLine)
+void CudaMemoryManager::cudaFreeSphereIndices(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaFreeHost(actuatorLine->boundingSphereIndicesH) );
-    checkCudaErrors( cudaFree(actuatorLine->boundingSphereIndicesD) );
+    checkCudaErrors( cudaFreeHost(actuatorFarm->boundingSphereIndicesH) );
+    checkCudaErrors( cudaFree(actuatorFarm->boundingSphereIndicesD) );
 }
 
 ////////////////////////////////////////////////////////////////////////////////////
@@ -3231,8 +3416,11 @@ void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level)
 }
 void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level)
 {
-    checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) );
+    auto probeStruct = probe->getProbeStruct(level);
+
+    checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayH, probeStruct->quantitiesArrayD, probeStruct->nArrays*sizeof(real)*probeStruct->nPoints, cudaMemcpyDeviceToHost) );
 }
+
 void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level)
 {
     checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesArrayH) );
@@ -3262,6 +3450,7 @@ void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int
     checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(Statistic::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) );
     checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(Statistic::LAST)*sizeof(int), cudaMemcpyDeviceToHost) );
 }
+
 void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level)
 {
     checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) );
@@ -3270,23 +3459,51 @@ void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int leve
     checkCudaErrors( cudaFree    (probe->getProbeStruct(level)->arrayOffsetsD) );
 }
 
+void CudaMemoryManager::cudaAllocPrecursorWriter(PrecursorWriter* writer, int level)
+{
+    auto prec =  writer->getPrecursorStruct(level);
+    size_t indSize = prec->numberOfPointsInBC*sizeof(uint);
 
+    checkCudaErrors( cudaStreamCreate(&prec->stream) );
 
+    checkCudaErrors( cudaMallocHost((void**) &prec->indicesH, indSize));
+    checkCudaErrors( cudaMalloc((void**) &prec->indicesD, indSize));
 
+    size_t dataSize  = prec->numberOfPointsInBC*sizeof(real)*prec->numberOfQuantities;
+    size_t dataSizeH = dataSize * prec->numberOfTimestepsPerFile;
+    
+    checkCudaErrors( cudaMallocHost((void**) &prec->dataH, dataSizeH));
+    checkCudaErrors( cudaMallocHost((void**) &prec->bufferH, dataSizeH));
+    checkCudaErrors( cudaMalloc((void**) &prec->dataD, dataSize));
+    checkCudaErrors( cudaMalloc((void**) &prec->bufferD, dataSize));
 
+    setMemsizeGPU(indSize+2*dataSize, false);
+}
 
+void CudaMemoryManager::cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level)
+{
+    checkCudaErrors( cudaMemcpy(writer->getPrecursorStruct(level)->indicesD, writer->getPrecursorStruct(level)->indicesH, writer->getPrecursorStruct(level)->numberOfPointsInBC*sizeof(uint), cudaMemcpyHostToDevice) );
+}
 
+void CudaMemoryManager::cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level)
+{
+    auto prec =  writer->getPrecursorStruct(level);
+    int sizeTimestep = prec->numberOfPointsInBC*prec->numberOfQuantities;
 
+    checkCudaErrors( cudaStreamSynchronize(prec->stream) );
+    checkCudaErrors( cudaMemcpyAsync( &prec->bufferH[prec->numberOfTimestepsBuffered*sizeTimestep], prec->bufferD, sizeof(real)*sizeTimestep, cudaMemcpyDeviceToHost, prec->stream));
+}
 
+void CudaMemoryManager::cudaFreePrecursorWriter(PrecursorWriter* writer, int level)
+{
+    checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->indicesH));
+    checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->indicesD));
 
-
-
-
-
-
-
-
-
+    checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->dataH));
+    checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->bufferH));
+    checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->dataD));
+    checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->bufferD));
+}
 
 
 CudaMemoryManager::CudaMemoryManager(std::shared_ptr<Parameter> parameter) : parameter(parameter)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index d410340d2de7797cf23a781a64d11f592d62a6fb..e2f2e8658b6ef7a9453546454dd8e1f643574e17 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -18,8 +18,10 @@
 
 class Parameter;
 class PorousMedia;
-class ActuatorLine;
+class ActuatorFarm;
 class Probe;
+class VelocitySetter;
+class PrecursorWriter;
 
 class VIRTUALFLUIDS_GPU_EXPORT CudaMemoryManager
 {
@@ -30,8 +32,8 @@ public:
     void setMemsizeGPU(double admem, bool reset);
     double getMemsizeGPU();
 
-    void cudaAllocFull(int lev);
-    void cudaFreeFull(int lev);
+    //void cudaAllocFull(int lev); //DEPRECATED: related to full matrix
+    //void cudaFreeFull(int lev);  //DEPRECATED: related to full matrix
 
     void cudaCopyPrint(int lev);
     void cudaCopyMedianPrint(int lev);
@@ -92,26 +94,20 @@ public:
     //////////////////////////////////////////////////////////////////////////
     //3D domain decomposition
     virtual void cudaAllocProcessNeighborX(int lev, unsigned int processNeighbor);
-    void cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
-                                      int streamIndex);
-    void cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
-                                      int streamIndex);
+    void cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv);
+    void cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend);
     virtual void cudaCopyProcessNeighborXIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor);
     //
     virtual void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor);
-    void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
-                                      int streamIndex);
-    void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
-                                      int streamIndex);
+    void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv);
+    void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend);
     virtual void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor);
     //
     virtual void cudaAllocProcessNeighborZ(int lev, unsigned int processNeighbor);
-    void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
-                                      int streamIndex);
-    void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
-                                      int streamIndex);
+    void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv);
+    void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend);
     virtual void cudaCopyProcessNeighborZIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor);
 
@@ -183,6 +179,13 @@ public:
     void cudaCopyStressBC(int lev);
     void cudaFreeStressBC(int lev);
 
+    void cudaAllocPrecursorBC(int lev);
+    void cudaAllocPrecursorData(int lev);
+    void cudaCopyPrecursorBC(int lev);
+    void cudaCopyPrecursorData(int lev);
+    void cudaFreePrecursorBC(int lev);
+    void cudaFreePrecursorData(int lev);
+
     void cudaAllocWallModel(int lev, bool hasWallModelMonitor);
     void cudaCopyWallModel(int lev,  bool hasWallModelMonitor);
     void cudaFreeWallModel(int lev,  bool hasWallModelMonitor);
@@ -346,42 +349,44 @@ public:
     void cudaCopyProcessNeighborADZIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborADZ(int lev, unsigned int processNeighbor);
 
-    void cudaAllocFluidNodeIndices(int lev);
-    void cudaCopyFluidNodeIndices(int lev);
-    void cudaFreeFluidNodeIndices(int lev);
-    void cudaAllocFluidNodeIndicesBorder(int lev);
-    void cudaCopyFluidNodeIndicesBorder(int lev);
-    void cudaFreeFluidNodeIndicesBorder(int lev);
-
-    // Actuator Line
-    void cudaAllocBladeRadii(ActuatorLine* actuatorLine);
-    void cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine);
-    void cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine);
-    void cudaFreeBladeRadii(ActuatorLine* actuatorLine);
-
-    void cudaAllocBladeCoords(ActuatorLine* actuatorLine);
-    void cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine);
-    void cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine);
-    void cudaFreeBladeCoords(ActuatorLine* actuatorLine);
-
-    void cudaAllocBladeIndices(ActuatorLine* actuatorLine);
-    void cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine);
-    void cudaFreeBladeIndices(ActuatorLine* actuatorLine);
-
-    void cudaAllocBladeVelocities(ActuatorLine* actuatorLine);
-    void cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine);
-    void cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine);
-    void cudaFreeBladeVelocities(ActuatorLine* actuatorLine);
-
-    void cudaAllocBladeForces(ActuatorLine* actuatorLine);
-    void cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine);
-    void cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine);
-    void cudaFreeBladeForces(ActuatorLine* actuatorLine);
-
-    void cudaAllocSphereIndices(ActuatorLine* actuatorLine);
-    void cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine);
-    void cudaFreeSphereIndices(ActuatorLine* actuatorLine);
-
+    void cudaAllocTaggedFluidNodeIndices(CollisionTemplate tag, int lev);
+    void cudaCopyTaggedFluidNodeIndices(CollisionTemplate tag, int lev);
+    void cudaFreeTaggedFluidNodeIndices(CollisionTemplate tag, int lev);
+
+    // ActuatorFarm
+    void cudaAllocBladeGeometries(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeGeometriesHtoD(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeGeometriesDtoH(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeGeometries(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocBladeOrientations(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeOrientationsHtoD(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeOrientationsDtoH(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeOrientations(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocBladeCoords(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeCoordsHtoD(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeCoordsDtoH(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeCoords(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocBladeIndices(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeIndicesHtoD(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeIndices(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocBladeVelocities(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeVelocitiesHtoD(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeVelocitiesDtoH(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocBladeForces(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeForcesHtoD(ActuatorFarm* actuatorFarm);
+    void cudaCopyBladeForcesDtoH(ActuatorFarm* actuatorFarm);
+    void cudaFreeBladeForces(ActuatorFarm* actuatorFarm);
+
+    void cudaAllocSphereIndices(ActuatorFarm* actuatorFarm);
+    void cudaCopySphereIndicesHtoD(ActuatorFarm* actuatorFarm);
+    void cudaFreeSphereIndices(ActuatorFarm* actuatorFarm);
+    // Probes
     void cudaAllocProbeDistances(Probe* probe, int level);
     void cudaCopyProbeDistancesHtoD(Probe* probe, int level);
     void cudaCopyProbeDistancesDtoH(Probe* probe, int level);
@@ -402,6 +407,12 @@ public:
     void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level);
     void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level);
 
+    //Precursor Writer
+    void cudaAllocPrecursorWriter(PrecursorWriter* writer, int level);
+    void cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level);
+    void cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level);
+    void cudaFreePrecursorWriter(PrecursorWriter* writer, int level);
+
 private:
     std::shared_ptr<Parameter> parameter;
     double memsizeGPU = 0.0;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
index bbce8181d814fc8b9dbb086764becb73a86c0eda..553e1f34f7993a42682605b66d53407ede9292fd 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
@@ -21,7 +21,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* neighborY,
 																unsigned int* neighborZ,
 																real* DDStart,
-																int size_Mat,
+																unsigned long long numberOfLBnodes,
 																int level,
 																real* forces,
 																bool EvenOrOdd)
@@ -37,7 +37,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -48,63 +48,63 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -975,7 +975,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														real* dxxUx,
 														real* dyyUy,
 														real* dzzUz,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -989,7 +989,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1000,63 +1000,63 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1089,33 +1089,33 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -2016,7 +2016,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real* coordY,
 													real* coordZ,
 													real* DDStart,
-													int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -2030,7 +2030,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -2041,63 +2041,63 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -2158,33 +2158,33 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//Ship
 			real coord0X = 281.125f;//7.5f;
@@ -3238,7 +3238,7 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -3252,7 +3252,7 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -3263,63 +3263,63 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -3380,33 +3380,33 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -4510,7 +4510,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -4524,7 +4524,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -4535,63 +4535,63 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -4624,33 +4624,33 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
-			real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
-			real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
-			real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
-			real f_T     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
-			real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
-			real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
-			real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
-			real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
-			real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
-			real f_TE    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
-			real f_BW    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
-			real f_BE    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
-			real f_TW    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
-			real f_TN    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
-			real f_BS    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
-			real f_BN    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
-			real f_TS    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+			real f_E     = (D.f[DIR_P00])[ke   ];// +  c2over27 ;
+			real f_W     = (D.f[DIR_M00])[kw   ];// +  c2over27 ;
+			real f_N     = (D.f[DIR_0P0])[kn   ];// +  c2over27 ;
+			real f_S     = (D.f[DIR_0M0])[ks   ];// +  c2over27 ;
+			real f_T     = (D.f[DIR_00P])[kt   ];// +  c2over27 ;
+			real f_B     = (D.f[DIR_00M])[kb   ];// +  c2over27 ;
+			real f_NE    = (D.f[DIR_PP0])[kne  ];// +  c1over54 ;
+			real f_SW    = (D.f[DIR_MM0])[ksw  ];// +  c1over54 ;
+			real f_SE    = (D.f[DIR_PM0])[kse  ];// +  c1over54 ;
+			real f_NW    = (D.f[DIR_MP0])[knw  ];// +  c1over54 ;
+			real f_TE    = (D.f[DIR_P0P])[kte  ];// +  c1over54 ;
+			real f_BW    = (D.f[DIR_M0M])[kbw  ];// +  c1over54 ;
+			real f_BE    = (D.f[DIR_P0M])[kbe  ];// +  c1over54 ;
+			real f_TW    = (D.f[DIR_M0P])[ktw  ];// +  c1over54 ;
+			real f_TN    = (D.f[DIR_0PP])[ktn  ];// +  c1over54 ;
+			real f_BS    = (D.f[DIR_0MM])[kbs  ];// +  c1over54 ;
+			real f_BN    = (D.f[DIR_0PM])[kbn  ];// +  c1over54 ;
+			real f_TS    = (D.f[DIR_0MP])[kts  ];// +  c1over54 ;
 			real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
-			real f_TNE   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
-			real f_TSW   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
-			real f_TSE   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
-			real f_TNW   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
-			real f_BNE   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
-			real f_BSW   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
-			real f_BSE   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
-			real f_BNW   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
+			real f_TNE   = (D.f[DIR_PPP])[ktne ];// +  c1over216;
+			real f_TSW   = (D.f[DIR_MMP])[ktsw ];// +  c1over216;
+			real f_TSE   = (D.f[DIR_PMP])[ktse ];// +  c1over216;
+			real f_TNW   = (D.f[DIR_MPP])[ktnw ];// +  c1over216;
+			real f_BNE   = (D.f[DIR_PPM])[kbne ];// +  c1over216;
+			real f_BSW   = (D.f[DIR_MMM])[kbsw ];// +  c1over216;
+			real f_BSE   = (D.f[DIR_PMM])[kbse ];// +  c1over216;
+			real f_BNW   = (D.f[DIR_MPM])[kbnw ];// +  c1over216;
 			////////////////////////////////////////////////////////////////////////////////////
 			real fx = c0o1;
 			real fy = c0o1;
@@ -5451,7 +5451,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd)
@@ -5467,7 +5467,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -5478,63 +5478,63 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -5568,33 +5568,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k   ];
-			real mfabb = (D.f[DIR_M00   ])[kw  ];
-			real mfbcb = (D.f[DIR_0P0   ])[k   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks  ];
-			real mfbbc = (D.f[DIR_00P   ])[k   ];
-			real mfbba = (D.f[DIR_00M   ])[kb  ];
-			real mfccb = (D.f[DIR_PP0  ])[k   ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw ];
-			real mfcab = (D.f[DIR_PM0  ])[ks  ];
-			real mfacb = (D.f[DIR_MP0  ])[kw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[k   ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw ];
-			real mfcba = (D.f[DIR_P0M  ])[kb  ];
-			real mfabc = (D.f[DIR_M0P  ])[kw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[k   ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs ];
-			real mfbca = (D.f[DIR_0PM  ])[kb  ];
-			real mfbac = (D.f[DIR_0MP  ])[ks  ];
+			real mfcbb = (D.f[DIR_P00])[k   ];
+			real mfabb = (D.f[DIR_M00])[kw  ];
+			real mfbcb = (D.f[DIR_0P0])[k   ];
+			real mfbab = (D.f[DIR_0M0])[ks  ];
+			real mfbbc = (D.f[DIR_00P])[k   ];
+			real mfbba = (D.f[DIR_00M])[kb  ];
+			real mfccb = (D.f[DIR_PP0])[k   ];
+			real mfaab = (D.f[DIR_MM0])[ksw ];
+			real mfcab = (D.f[DIR_PM0])[ks  ];
+			real mfacb = (D.f[DIR_MP0])[kw  ];
+			real mfcbc = (D.f[DIR_P0P])[k   ];
+			real mfaba = (D.f[DIR_M0M])[kbw ];
+			real mfcba = (D.f[DIR_P0M])[kb  ];
+			real mfabc = (D.f[DIR_M0P])[kw  ];
+			real mfbcc = (D.f[DIR_0PP])[k   ];
+			real mfbaa = (D.f[DIR_0MM])[kbs ];
+			real mfbca = (D.f[DIR_0PM])[kb  ];
+			real mfbac = (D.f[DIR_0MP])[ks  ];
 			real mfbbb = (D.f[DIR_000])[k   ];
-			real mfccc = (D.f[DIR_PPP ])[k   ];
-			real mfaac = (D.f[DIR_MMP ])[ksw ];
-			real mfcac = (D.f[DIR_PMP ])[ks  ];
-			real mfacc = (D.f[DIR_MPP ])[kw  ];
-			real mfcca = (D.f[DIR_PPM ])[kb  ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs ];
-			real mfaca = (D.f[DIR_MPM ])[kbw ];
+			real mfccc = (D.f[DIR_PPP])[k   ];
+			real mfaac = (D.f[DIR_MMP])[ksw ];
+			real mfcac = (D.f[DIR_PMP])[ks  ];
+			real mfacc = (D.f[DIR_MPP])[kw  ];
+			real mfcca = (D.f[DIR_PPM])[kb  ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs ];
+			real mfaca = (D.f[DIR_MPM])[kbw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -6349,33 +6349,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 			mfbbb += drho - drhoPost;
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
-			(D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
-			(D.f[DIR_0P0   ])[k   ] = mfbab;
-			(D.f[DIR_0M0   ])[ks  ] = mfbcb;
-			(D.f[DIR_00P   ])[k   ] = mfbba;
-			(D.f[DIR_00M   ])[kb  ] = mfbbc;
-			(D.f[DIR_PP0  ])[k   ] = mfaab;
-			(D.f[DIR_MM0  ])[ksw ] = mfccb;
-			(D.f[DIR_PM0  ])[ks  ] = mfacb;
-			(D.f[DIR_MP0  ])[kw  ] = mfcab;
-			(D.f[DIR_P0P  ])[k   ] = mfaba;
-			(D.f[DIR_M0M  ])[kbw ] = mfcbc;
-			(D.f[DIR_P0M  ])[kb  ] = mfabc;
-			(D.f[DIR_M0P  ])[kw  ] = mfcba;
-			(D.f[DIR_0PP  ])[k   ] = mfbaa;
-			(D.f[DIR_0MM  ])[kbs ] = mfbcc;
-			(D.f[DIR_0PM  ])[kb  ] = mfbac;
-			(D.f[DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[DIR_P00])[k   ] = mfabb;                                                                   
+			(D.f[DIR_M00])[kw  ] = mfcbb;                                                                 
+			(D.f[DIR_0P0])[k   ] = mfbab;
+			(D.f[DIR_0M0])[ks  ] = mfbcb;
+			(D.f[DIR_00P])[k   ] = mfbba;
+			(D.f[DIR_00M])[kb  ] = mfbbc;
+			(D.f[DIR_PP0])[k   ] = mfaab;
+			(D.f[DIR_MM0])[ksw ] = mfccb;
+			(D.f[DIR_PM0])[ks  ] = mfacb;
+			(D.f[DIR_MP0])[kw  ] = mfcab;
+			(D.f[DIR_P0P])[k   ] = mfaba;
+			(D.f[DIR_M0M])[kbw ] = mfcbc;
+			(D.f[DIR_P0M])[kb  ] = mfabc;
+			(D.f[DIR_M0P])[kw  ] = mfcba;
+			(D.f[DIR_0PP])[k   ] = mfbaa;
+			(D.f[DIR_0MM])[kbs ] = mfbcc;
+			(D.f[DIR_0PM])[kb  ] = mfbac;
+			(D.f[DIR_0MP])[ks  ] = mfbca;
 			(D.f[DIR_000])[k   ] = mfbbb;
-			(D.f[DIR_PPP ])[k   ] = mfaaa;
-			(D.f[DIR_PMP ])[ks  ] = mfaca;
-			(D.f[DIR_PPM ])[kb  ] = mfaac;
-			(D.f[DIR_PMM ])[kbs ] = mfacc;
-			(D.f[DIR_MPP ])[kw  ] = mfcaa;
-			(D.f[DIR_MMP ])[ksw ] = mfcca;
-			(D.f[DIR_MPM ])[kbw ] = mfcac;
-			(D.f[DIR_MMM ])[kbsw] = mfccc;
+			(D.f[DIR_PPP])[k   ] = mfaaa;
+			(D.f[DIR_PMP])[ks  ] = mfaca;
+			(D.f[DIR_PPM])[kb  ] = mfaac;
+			(D.f[DIR_PMM])[kbs ] = mfacc;
+			(D.f[DIR_MPP])[kw  ] = mfcaa;
+			(D.f[DIR_MMP])[ksw ] = mfcca;
+			(D.f[DIR_MPM])[kbw ] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
index 97c1aff4d26cb85deaf1dd0d145245f28affc2e3..3706e5f929b50a2a72c107a982525ec3172eb144 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
@@ -51,7 +51,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -67,7 +67,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -78,63 +78,63 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -170,33 +170,33 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
             real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -966,7 +966,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -982,7 +982,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -993,63 +993,63 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -1085,33 +1085,33 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
             real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1762,7 +1762,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -1778,7 +1778,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -1789,63 +1789,63 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             ////////////////////////////////////////////////////////////////////////////////
             //index
@@ -1857,33 +1857,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
             unsigned int kbs = neighborZ[ks];
             unsigned int kbsw = neighborZ[ksw];
             ////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00   ])[k   ];
-            real mfabb = (D.f[DIR_M00   ])[kw  ];
-            real mfbcb = (D.f[DIR_0P0   ])[k   ];
-            real mfbab = (D.f[DIR_0M0   ])[ks  ];
-            real mfbbc = (D.f[DIR_00P   ])[k   ];
-            real mfbba = (D.f[DIR_00M   ])[kb  ];
-            real mfccb = (D.f[DIR_PP0  ])[k   ];
-            real mfaab = (D.f[DIR_MM0  ])[ksw ];
-            real mfcab = (D.f[DIR_PM0  ])[ks  ];
-            real mfacb = (D.f[DIR_MP0  ])[kw  ];
-            real mfcbc = (D.f[DIR_P0P  ])[k   ];
-            real mfaba = (D.f[DIR_M0M  ])[kbw ];
-            real mfcba = (D.f[DIR_P0M  ])[kb  ];
-            real mfabc = (D.f[DIR_M0P  ])[kw  ];
-            real mfbcc = (D.f[DIR_0PP  ])[k   ];
-            real mfbaa = (D.f[DIR_0MM  ])[kbs ];
-            real mfbca = (D.f[DIR_0PM  ])[kb  ];
-            real mfbac = (D.f[DIR_0MP  ])[ks  ];
+            real mfcbb = (D.f[DIR_P00])[k   ];
+            real mfabb = (D.f[DIR_M00])[kw  ];
+            real mfbcb = (D.f[DIR_0P0])[k   ];
+            real mfbab = (D.f[DIR_0M0])[ks  ];
+            real mfbbc = (D.f[DIR_00P])[k   ];
+            real mfbba = (D.f[DIR_00M])[kb  ];
+            real mfccb = (D.f[DIR_PP0])[k   ];
+            real mfaab = (D.f[DIR_MM0])[ksw ];
+            real mfcab = (D.f[DIR_PM0])[ks  ];
+            real mfacb = (D.f[DIR_MP0])[kw  ];
+            real mfcbc = (D.f[DIR_P0P])[k   ];
+            real mfaba = (D.f[DIR_M0M])[kbw ];
+            real mfcba = (D.f[DIR_P0M])[kb  ];
+            real mfabc = (D.f[DIR_M0P])[kw  ];
+            real mfbcc = (D.f[DIR_0PP])[k   ];
+            real mfbaa = (D.f[DIR_0MM])[kbs ];
+            real mfbca = (D.f[DIR_0PM])[kb  ];
+            real mfbac = (D.f[DIR_0MP])[ks  ];
             real mfbbb = (D.f[DIR_000])[k   ];
-            real mfccc = (D.f[DIR_PPP ])[k   ];
-            real mfaac = (D.f[DIR_MMP ])[ksw ];
-            real mfcac = (D.f[DIR_PMP ])[ks  ];
-            real mfacc = (D.f[DIR_MPP ])[kw  ];
-            real mfcca = (D.f[DIR_PPM ])[kb  ];
-            real mfaaa = (D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM ])[kbs ];
-            real mfaca = (D.f[DIR_MPM ])[kbw ];
+            real mfccc = (D.f[DIR_PPP])[k   ];
+            real mfaac = (D.f[DIR_MMP])[ksw ];
+            real mfcac = (D.f[DIR_PMP])[ks  ];
+            real mfacc = (D.f[DIR_MPP])[kw  ];
+            real mfcca = (D.f[DIR_PPM])[kb  ];
+            real mfaaa = (D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs ];
+            real mfaca = (D.f[DIR_MPM])[kbw ];
             ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -2204,33 +2204,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
             mfbbb += drho - drhoPost;
             ////////////////////////////////////////////////////////////////////////////////////
-            (D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
-            (D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
-            (D.f[DIR_0P0   ])[k   ] = mfbab;
-            (D.f[DIR_0M0   ])[ks  ] = mfbcb;
-            (D.f[DIR_00P   ])[k   ] = mfbba;
-            (D.f[DIR_00M   ])[kb  ] = mfbbc;
-            (D.f[DIR_PP0  ])[k   ] = mfaab;
-            (D.f[DIR_MM0  ])[ksw ] = mfccb;
-            (D.f[DIR_PM0  ])[ks  ] = mfacb;
-            (D.f[DIR_MP0  ])[kw  ] = mfcab;
-            (D.f[DIR_P0P  ])[k   ] = mfaba;
-            (D.f[DIR_M0M  ])[kbw ] = mfcbc;
-            (D.f[DIR_P0M  ])[kb  ] = mfabc;
-            (D.f[DIR_M0P  ])[kw  ] = mfcba;
-            (D.f[DIR_0PP  ])[k   ] = mfbaa;
-            (D.f[DIR_0MM  ])[kbs ] = mfbcc;
-            (D.f[DIR_0PM  ])[kb  ] = mfbac;
-            (D.f[DIR_0MP  ])[ks  ] = mfbca;
+            (D.f[DIR_P00])[k   ] = mfabb;                                                                   
+            (D.f[DIR_M00])[kw  ] = mfcbb;                                                                 
+            (D.f[DIR_0P0])[k   ] = mfbab;
+            (D.f[DIR_0M0])[ks  ] = mfbcb;
+            (D.f[DIR_00P])[k   ] = mfbba;
+            (D.f[DIR_00M])[kb  ] = mfbbc;
+            (D.f[DIR_PP0])[k   ] = mfaab;
+            (D.f[DIR_MM0])[ksw ] = mfccb;
+            (D.f[DIR_PM0])[ks  ] = mfacb;
+            (D.f[DIR_MP0])[kw  ] = mfcab;
+            (D.f[DIR_P0P])[k   ] = mfaba;
+            (D.f[DIR_M0M])[kbw ] = mfcbc;
+            (D.f[DIR_P0M])[kb  ] = mfabc;
+            (D.f[DIR_M0P])[kw  ] = mfcba;
+            (D.f[DIR_0PP])[k   ] = mfbaa;
+            (D.f[DIR_0MM])[kbs ] = mfbcc;
+            (D.f[DIR_0PM])[kb  ] = mfbac;
+            (D.f[DIR_0MP])[ks  ] = mfbca;
             (D.f[DIR_000])[k   ] = mfbbb;
-            (D.f[DIR_PPP ])[k   ] = mfaaa;
-            (D.f[DIR_PMP ])[ks  ] = mfaca;
-            (D.f[DIR_PPM ])[kb  ] = mfaac;
-            (D.f[DIR_PMM ])[kbs ] = mfacc;
-            (D.f[DIR_MPP ])[kw  ] = mfcaa;
-            (D.f[DIR_MMP ])[ksw ] = mfcca;
-            (D.f[DIR_MPM ])[kbw ] = mfcac;
-            (D.f[DIR_MMM ])[kbsw] = mfccc;
+            (D.f[DIR_PPP])[k   ] = mfaaa;
+            (D.f[DIR_PMP])[ks  ] = mfaca;
+            (D.f[DIR_PPM])[kb  ] = mfaac;
+            (D.f[DIR_PMM])[kbs ] = mfacc;
+            (D.f[DIR_MPP])[kw  ] = mfcaa;
+            (D.f[DIR_MMP])[ksw ] = mfcca;
+            (D.f[DIR_MPM])[kbw ] = mfcac;
+            (D.f[DIR_MMM])[kbsw] = mfccc;
         }
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
index 7adfd40da157d825d83c63b084bf1f855ea6dca2..c89c3cfe87560c808d47163b45d512fa0d7e494f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
@@ -27,7 +27,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 															 real* vzOut,
 															 real* DDStart,
 															 real* G6,
-															 int size_Mat,
+															 unsigned long long numberOfLBnodes,
 															 int level,
 															 real* forces,
 															 bool EvenOrOdd)
@@ -43,7 +43,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k < size_Mat)
+	if (k < numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -54,83 +54,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+				G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+				G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+				G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+				G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+				G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+				G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1026,83 +1026,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //			else
 //			{
-//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 //			}
 //			else
 //			{
-//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -2006,83 +2006,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //			else
 //			{
-//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 //			}
 //			else
 //			{
-//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -2153,33 +2153,33 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			real dyyuy = c1o2 * (-mgbcb + mgbab);
 //			real dzzuz = c1o2 * (-mgbbc + mgbba);
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-//			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-//			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-//			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-//			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-//			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-//			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-//			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-//			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-//			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-//			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-//			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-//			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-//			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-//			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-//			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-//			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-//			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+//			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+//			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+//			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+//			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+//			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+//			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+//			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+//			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+//			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+//			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+//			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+//			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+//			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+//			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+//			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+//			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+//			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+//			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 //			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-//			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-//			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-//			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-//			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-//			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-//			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-//			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-//			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+//			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+//			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+//			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+//			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+//			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+//			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+//			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+//			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 //			////////////////////////////////////////////////////////////////////////////////////
 //			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 //				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
index 5146242fed374a919b6dcc02774db1d8ce4f864a..0e3945829725c0614ed4da01d0bae3b99ba2720a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
@@ -17,69 +17,69 @@ __global__ void DragLiftPost27(  real* DD,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -100,24 +100,24 @@ __global__ void DragLiftPost27(  real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -160,32 +160,32 @@ __global__ void DragLiftPost27(  real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_W    = (D.f[DIR_P00   ])[ke   ];
-		f_E    = (D.f[DIR_M00   ])[kw   ];
-		f_S    = (D.f[DIR_0P0   ])[kn   ];
-		f_N    = (D.f[DIR_0M0   ])[ks   ];
-		f_B    = (D.f[DIR_00P   ])[kt   ];
-		f_T    = (D.f[DIR_00M   ])[kb   ];
-		f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+		f_W    = (D.f[DIR_P00])[ke   ];
+		f_E    = (D.f[DIR_M00])[kw   ];
+		f_S    = (D.f[DIR_0P0])[kn   ];
+		f_N    = (D.f[DIR_0M0])[ks   ];
+		f_B    = (D.f[DIR_00P])[kt   ];
+		f_T    = (D.f[DIR_00M])[kb   ];
+		f_SW   = (D.f[DIR_PP0])[kne  ];
+		f_NE   = (D.f[DIR_MM0])[ksw  ];
+		f_NW   = (D.f[DIR_PM0])[kse  ];
+		f_SE   = (D.f[DIR_MP0])[knw  ];
+		f_BW   = (D.f[DIR_P0P])[kte  ];
+		f_TE   = (D.f[DIR_M0M])[kbw  ];
+		f_TW   = (D.f[DIR_P0M])[kbe  ];
+		f_BE   = (D.f[DIR_M0P])[ktw  ];
+		f_BS   = (D.f[DIR_0PP])[ktn  ];
+		f_TN   = (D.f[DIR_0MM])[kbs  ];
+		f_TS   = (D.f[DIR_0PM])[kbn  ];
+		f_BN   = (D.f[DIR_0MP])[kts  ];
+		f_BSW  = (D.f[DIR_PPP])[ktne ];
+		f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		f_BNW  = (D.f[DIR_PMP])[ktse ];
+		f_BSE  = (D.f[DIR_MPP])[ktnw ];
+		f_TSW  = (D.f[DIR_PPM])[kbne ];
+		f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		f_TNW  = (D.f[DIR_PMM])[kbse ];
+		f_TSE  = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
@@ -282,69 +282,69 @@ __global__ void DragLiftPre27(   real* DD,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -365,24 +365,24 @@ __global__ void DragLiftPre27(   real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -425,32 +425,32 @@ __global__ void DragLiftPre27(   real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_E   = (D.f[DIR_P00   ])[ke   ];
-		f_W   = (D.f[DIR_M00   ])[kw   ];
-		f_N   = (D.f[DIR_0P0   ])[kn   ];
-		f_S   = (D.f[DIR_0M0   ])[ks   ];
-		f_T   = (D.f[DIR_00P   ])[kt   ];
-		f_B   = (D.f[DIR_00M   ])[kb   ];
-		f_NE  = (D.f[DIR_PP0  ])[kne  ];
-		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-		f_SE  = (D.f[DIR_PM0  ])[kse  ];
-		f_NW  = (D.f[DIR_MP0  ])[knw  ];
-		f_TE  = (D.f[DIR_P0P  ])[kte  ];
-		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-		f_TS  = (D.f[DIR_0MP  ])[kts  ];
-		f_TNE = (D.f[DIR_PPP ])[ktne ];
-		f_TSW = (D.f[DIR_MMP ])[ktsw ];
-		f_TSE = (D.f[DIR_PMP ])[ktse ];
-		f_TNW = (D.f[DIR_MPP ])[ktnw ];
-		f_BNE = (D.f[DIR_PPM ])[kbne ];
-		f_BSW = (D.f[DIR_MMM ])[kbsw ];
-		f_BSE = (D.f[DIR_PMM ])[kbse ];
-		f_BNW = (D.f[DIR_MPM ])[kbnw ];
+		f_E   = (D.f[DIR_P00])[ke   ];
+		f_W   = (D.f[DIR_M00])[kw   ];
+		f_N   = (D.f[DIR_0P0])[kn   ];
+		f_S   = (D.f[DIR_0M0])[ks   ];
+		f_T   = (D.f[DIR_00P])[kt   ];
+		f_B   = (D.f[DIR_00M])[kb   ];
+		f_NE  = (D.f[DIR_PP0])[kne  ];
+		f_SW  = (D.f[DIR_MM0])[ksw  ];
+		f_SE  = (D.f[DIR_PM0])[kse  ];
+		f_NW  = (D.f[DIR_MP0])[knw  ];
+		f_TE  = (D.f[DIR_P0P])[kte  ];
+		f_BW  = (D.f[DIR_M0M])[kbw  ];
+		f_BE  = (D.f[DIR_P0M])[kbe  ];
+		f_TW  = (D.f[DIR_M0P])[ktw  ];
+		f_TN  = (D.f[DIR_0PP])[ktn  ];
+		f_BS  = (D.f[DIR_0MM])[kbs  ];
+		f_BN  = (D.f[DIR_0PM])[kbn  ];
+		f_TS  = (D.f[DIR_0MP])[kts  ];
+		f_TNE = (D.f[DIR_PPP])[ktne ];
+		f_TSW = (D.f[DIR_MMP])[ktsw ];
+		f_TSE = (D.f[DIR_PMP])[ktse ];
+		f_TNW = (D.f[DIR_MPP])[ktnw ];
+		f_BNE = (D.f[DIR_PPM])[kbne ];
+		f_BSW = (D.f[DIR_MMM])[kbsw ];
+		f_BSE = (D.f[DIR_PMM])[kbse ];
+		f_BNW = (D.f[DIR_MPM])[kbnw ];
 		 ////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
index acd62b46c5666fc5f621c3772438e42b7ebef5c6..93879d73a32458d5403fd3fd16e68e0fcea7753d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
@@ -13,7 +13,7 @@
 
 #include <iomanip>
 
-//#include "Core/Logger/Logger.h"
+#include "cuda/CudaGrid.h"
 
 #include "Parameter/Parameter.h"
 // includes, kernels
@@ -22,7 +22,7 @@
 
 using namespace vf::lbm::constant;
 
-__global__                 void enstrophyKernel  ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat );
+__global__                 void enstrophyKernel  ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes );
 
 __host__ __device__ inline void enstrophyFunction( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint index );
 
@@ -32,55 +32,40 @@ bool EnstrophyAnalyzer::run(uint iter)
 {
     if( iter % this->analyzeIter != 0 ) return false;
 
-	int lev = 0;
-	int size_Mat = this->para->getParD(lev)->numberOfNodes;
-	
-	thrust::device_vector<real> enstrophy( size_Mat, c0o1 );
-    thrust::device_vector<uint> isFluid  ( size_Mat, 0);
-
-	unsigned int numberOfThreads = 128;
-    int Grid = (size_Mat / numberOfThreads)+1;
-    int Grid1, Grid2;
-    if (Grid>512)
-    {
-       Grid1 = 512;
-       Grid2 = (Grid/Grid1)+1;
-    } 
-    else
-    {
-       Grid1 = 1;
-       Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1 );
-
-    LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX,
-										    para->getParD(lev)->velocityY,
-										    para->getParD(lev)->velocityZ,
-										    para->getParD(lev)->rho,
-										    para->getParD(lev)->pressure,
-										    para->getParD(lev)->typeOfGridNode,
-										    para->getParD(lev)->neighborX,
-										    para->getParD(lev)->neighborY,
-										    para->getParD(lev)->neighborZ,
-										    para->getParD(lev)->numberOfNodes,
-										    para->getParD(lev)->distributions.f[0],
-										    para->getParD(lev)->isEvenTimestep); 
-	//cudaDeviceSynchronize();
-	getLastCudaError("LBCalcMacSP27 execution failed"); 
-
-	enstrophyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX,
-											para->getParD(lev)->velocityY, 
-											para->getParD(lev)->velocityZ, 
-											para->getParD(lev)->rho, 
-											para->getParD(lev)->neighborX,
-											para->getParD(lev)->neighborY,
-											para->getParD(lev)->neighborZ,
-											para->getParD(lev)->neighborInverse,
-											para->getParD(lev)->typeOfGridNode,
-											enstrophy.data().get(), 
-                                            isFluid.data().get(),
-											size_Mat);
+    int lev = 0;
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes);
+
+    thrust::device_vector<real> enstrophy( this->para->getParD(lev)->numberOfNodes, c0o1);
+    thrust::device_vector<uint> isFluid  ( this->para->getParD(lev)->numberOfNodes, 0);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY,
+        para->getParD(lev)->velocityZ,
+        para->getParD(lev)->rho,
+        para->getParD(lev)->pressure,
+        para->getParD(lev)->typeOfGridNode,
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->numberOfNodes,
+        para->getParD(lev)->distributions.f[0],
+        para->getParD(lev)->isEvenTimestep); 
+    getLastCudaError("LBCalcMacCompSP27 execution failed");
+
+    enstrophyKernel<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY, 
+        para->getParD(lev)->velocityZ, 
+        para->getParD(lev)->rho, 
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->neighborInverse,
+        para->getParD(lev)->typeOfGridNode,
+        enstrophy.data().get(), 
+        isFluid.data().get(),
+        para->getParD(lev)->numberOfNodes);
 	cudaDeviceSynchronize(); 
 	getLastCudaError("enstrophyKernel execution failed");
 
@@ -97,7 +82,7 @@ bool EnstrophyAnalyzer::run(uint iter)
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat)
+__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes)
 {
     //////////////////////////////////////////////////////////////////////////
     const uint x = threadIdx.x;  // Globaler x-Index 
@@ -113,7 +98,7 @@ __global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho
 
     //if( index % 34 == 0 || index % 34 == 33 ) return;
 
-    if( index >= size_Mat) return;
+    if( index >= (uint)numberOfLBnodes) return;
 
 	unsigned int BC;
 	BC = geo[index];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
index 5470da46342c85e57370227313c8c82674a17e6e..4ced64c0152bdbbd9752f736e2edca2c51fbc2ff 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
@@ -14,7 +14,7 @@ __global__ void getSendFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
+                                           unsigned long long numberOfLBnodes, 
                                            bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -65,150 +65,150 @@ __global__ void getSendFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      //(Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      //(Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      //(Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      //(Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      //(Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      //(Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      //(Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      //(Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      //(Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      //(Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      //(Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      //(Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      //(Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      //(Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      //(Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      //(Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      //(Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
-      //(Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      //(Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke   ];
+      //(Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw   ];
+      //(Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn   ];
+      //(Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks   ];
+      //(Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt   ];
+      //(Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb   ];
+      //(Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne  ];
+      //(Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw  ];
+      //(Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse  ];
+      //(Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw  ];
+      //(Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte  ];
+      //(Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw  ];
+      //(Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe  ];
+      //(Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw  ];
+      //(Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn  ];
+      //(Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs  ];
+      //(Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn  ];
+      //(Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts  ];
       //(Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      //(Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
-      //(Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      //(Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
-      //(Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      //(Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
-      //(Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      //(Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
-      //(Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
-      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0MP  ])[kts  ];
-      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
+      //(Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ];
+      //(Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ];
+      //(Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ];
+      //(Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ];
+      //(Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ];
+      //(Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ];
+      //(Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ];
+      //(Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ];
+      (Dbuff.f[DIR_P00])[k] = (D.f[DIR_M00])[kw   ];
+      (Dbuff.f[DIR_M00])[k] = (D.f[DIR_P00])[ke   ];
+      (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0M0])[ks   ];
+      (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0P0])[kn   ];
+      (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00M])[kb   ];
+      (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00P])[kt   ];
+      (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_MM0])[ksw  ];
+      (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_PP0])[kne  ];
+      (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_MP0])[knw  ];
+      (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_PM0])[kse  ];
+      (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_M0M])[kbw  ];
+      (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_P0P])[kte  ];
+      (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_M0P])[ktw  ];
+      (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_P0M])[kbe  ];
+      (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0MM])[kbs  ];
+      (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0PP])[ktn  ];
+      (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0MP])[kts  ];
+      (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0PM])[kbn  ];
       (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_PPM ])[kbne ];
-      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_MPM ])[kbnw ];
-      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_PMM ])[kbse ];
-      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_PPP ])[ktne ];
-      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_PMP ])[ktse ];
+      (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_MMM])[kbsw ];
+      (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_PPM])[kbne ];
+      (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_MPM])[kbnw ];
+      (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_PMM])[kbse ];
+      (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_MMP])[ktsw ];
+      (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_PPP])[ktne ];
+      (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_MPP])[ktnw ];
+      (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_PMP])[ktse ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -250,7 +250,7 @@ __global__ void setRecvFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
+                                           unsigned long long numberOfLBnodes, 
                                            bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -301,150 +301,150 @@ __global__ void setRecvFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      //(D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
-      //(D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
-      //(D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
-      //(D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
-      //(D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
-      //(D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
-      //(D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
-      //(D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
-      //(D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
-      //(D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
-      //(D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
-      //(D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
-      //(D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
-      //(D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
-      //(D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
-      //(D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
-      //(D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
-      //(D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      //(D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_P00])[k];
+      //(D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_M00])[k];
+      //(D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0P0])[k];
+      //(D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0M0])[k];
+      //(D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00P])[k];
+      //(D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00M])[k];
+      //(D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_PP0])[k];
+      //(D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_MM0])[k];
+      //(D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_PM0])[k];
+      //(D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_MP0])[k];
+      //(D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_P0P])[k];
+      //(D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_M0M])[k];
+      //(D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_P0M])[k];
+      //(D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_M0P])[k];
+      //(D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0PP])[k];
+      //(D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0MM])[k];
+      //(D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0PM])[k];
+      //(D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0MP])[k];
       //(D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      //(D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
-      //(D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
-      //(D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
-      //(D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
-      //(D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
-      //(D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
-      //(D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
-      //(D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
-      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_P00   ])[k];
-      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_M00   ])[k];
-      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0P0   ])[k];
-      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0M0   ])[k];
-      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00P   ])[k];
-      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00M   ])[k];
-      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_PP0  ])[k];
-      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_MM0  ])[k];
-      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_PM0  ])[k];
-      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_MP0  ])[k];
-      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_P0P  ])[k];
-      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_M0M  ])[k];
-      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_P0M  ])[k];
-      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_M0P  ])[k];
-      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0PP  ])[k];
-      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0MM  ])[k];
-      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0PM  ])[k];
-      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0MP  ])[k];
+      //(D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k];
+      //(D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k];
+      //(D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k];
+      //(D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k];
+      //(D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k];
+      //(D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k];
+      //(D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k];
+      //(D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k];
+      (D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_P00])[k];
+      (D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_M00])[k];
+      (D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0P0])[k];
+      (D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0M0])[k];
+      (D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00P])[k];
+      (D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00M])[k];
+      (D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_PP0])[k];
+      (D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_MM0])[k];
+      (D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_PM0])[k];
+      (D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_MP0])[k];
+      (D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_P0P])[k];
+      (D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_M0M])[k];
+      (D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_P0M])[k];
+      (D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_M0P])[k];
+      (D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0PP])[k];
+      (D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0MM])[k];
+      (D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0PM])[k];
+      (D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0MP])[k];
       (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_PPP ])[k];
-      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_MMP ])[k];
-      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_PMP ])[k];
-      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_MPP ])[k];
-      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_PPM ])[k];
-      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_MMM ])[k];
-      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_PMM ])[k];
-      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_MPM ])[k];
+      (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_PPP])[k];
+      (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_MMP])[k];
+      (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_PMP])[k];
+      (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_MPP])[k];
+      (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_PPM])[k];
+      (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_MMM])[k];
+      (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_PMM])[k];
+      (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_MPM])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -485,7 +485,7 @@ __global__ void getSendFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat, 
+                                          unsigned long long numberOfLBnodes, 
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -536,123 +536,123 @@ __global__ void getSendFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
-      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      (Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke   ];
+      (Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw   ];
+      (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn   ];
+      (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks   ];
+      (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt   ];
+      (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb   ];
+      (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne  ];
+      (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw  ];
+      (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse  ];
+      (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw  ];
+      (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte  ];
+      (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw  ];
+      (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe  ];
+      (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw  ];
+      (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn  ];
+      (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs  ];
+      (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn  ];
+      (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts  ];
       (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
-      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
-      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
-      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
-      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
+      (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ];
+      (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ];
+      (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ];
+      (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ];
+      (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ];
+      (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ];
+      (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ];
+      (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -694,7 +694,7 @@ __global__ void setRecvFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat, 
+                                          unsigned long long numberOfLBnodes, 
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -745,123 +745,123 @@ __global__ void setRecvFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
-      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
-      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
-      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
-      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
-      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
-      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
-      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
-      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
-      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
-      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
-      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
-      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
-      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
-      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
-      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
-      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
-      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      (D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_P00])[k];
+      (D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_M00])[k];
+      (D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0P0])[k];
+      (D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0M0])[k];
+      (D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00P])[k];
+      (D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00M])[k];
+      (D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_PP0])[k];
+      (D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_MM0])[k];
+      (D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_PM0])[k];
+      (D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_MP0])[k];
+      (D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_P0P])[k];
+      (D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_M0M])[k];
+      (D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_P0M])[k];
+      (D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_M0P])[k];
+      (D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0PP])[k];
+      (D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0MM])[k];
+      (D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0PM])[k];
+      (D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0MP])[k];
       (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
-      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
-      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
-      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
-      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
-      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
-      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
-      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
+      (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k];
+      (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k];
+      (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k];
+      (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k];
+      (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k];
+      (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k];
+      (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k];
+      (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -903,7 +903,7 @@ __global__ void getSendGsF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -931,31 +931,31 @@ __global__ void getSendGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		else
 		{
-			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
-		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
-		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
-		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
-		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
-		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write Gs to buffer
 		(Dbuff.g[DIR_P00])[k] = (G.g[DIR_M00])[kw];
@@ -1006,7 +1006,7 @@ __global__ void setRecvGsF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1034,31 +1034,31 @@ __global__ void setRecvGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		else
 		{
-			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
-		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
-		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
-		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
-		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
-		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write buffer to Gs
 		(G.g[DIR_M00])[kw] = (Dbuff.g[DIR_P00])[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index ee987ae23402ef304220349db77084cc341ccd5a..ae8cbb77ec2493126d64b90a7119cbfa3efee666 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -29,7 +29,7 @@ void KernelCas27(unsigned int grid_nx,
                             unsigned int* neighborY,
                             unsigned int* neighborZ,
                             real* DD,
-                            int size_Mat,
+                            unsigned long long numberOfLBnodes,
                             bool EvenOrOdd);
 
 void KernelCasSP27(unsigned int numberOfThreads, 
@@ -39,7 +39,7 @@ void KernelCasSP27(unsigned int numberOfThreads,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
                               real* DD,
-                              int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               bool EvenOrOdd);
 
 void KernelCasSPMS27(unsigned int numberOfThreads, 
@@ -49,7 +49,7 @@ void KernelCasSPMS27(unsigned int numberOfThreads,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
                                 real* DD,
-                                int size_Mat,
+                                unsigned long long numberOfLBnodes,
                                 bool EvenOrOdd);
 
 void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
@@ -59,7 +59,7 @@ void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
                                    real* DD,
-                                   int size_Mat,
+                                   unsigned long long numberOfLBnodes,
                                    bool EvenOrOdd);
 
 void KernelKumCompSRTSP27(
@@ -70,7 +70,7 @@ void KernelKumCompSRTSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd);
@@ -82,7 +82,7 @@ void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    unsigned int* neighborY,
 									    unsigned int* neighborZ,
 									    real* DD,
-									    int size_Mat,
+									    unsigned long long numberOfLBnodes,
 									    int level,
 									    real* forces,
 									    bool EvenOrOdd);
@@ -94,7 +94,7 @@ void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
 											real* DD,
-											int size_Mat,
+											unsigned long long numberOfLBnodes,
 											int size_Array,
 											int level,
 											real* forces,
@@ -112,7 +112,7 @@ void KernelKum1hSP27(    unsigned int numberOfThreads,
 									real* coordY,
 									real* coordZ,
 									real* DDStart,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void KernelCascadeSP27(unsigned int numberOfThreads, 
@@ -122,7 +122,7 @@ void KernelCascadeSP27(unsigned int numberOfThreads,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
 								  real* DD,
-								  int size_Mat,
+								  unsigned long long numberOfLBnodes,
 								  bool EvenOrOdd);
 
 void KernelKumNewSP27(   unsigned int numberOfThreads, 
@@ -132,7 +132,7 @@ void KernelKumNewSP27(   unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									real* DD,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 
@@ -144,7 +144,7 @@ void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -158,7 +158,7 @@ void CumulantOnePreconditionedChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -172,7 +172,7 @@ void CumulantOneChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -189,7 +189,7 @@ void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 real* dxxUx,
 									 real* dyyUy,
 									 real* dzzUz,
-									 int size_Mat,
+									 unsigned long long numberOfLBnodes,
 									 bool EvenOrOdd);
 
 void KernelKumCompSP27(  unsigned int numberOfThreads, 
@@ -199,7 +199,7 @@ void KernelKumCompSP27(  unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									real* DD,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void KernelWaleBySoniMalavCumAA2016CompSP27(
@@ -215,7 +215,7 @@ void KernelWaleBySoniMalavCumAA2016CompSP27(
 	real* veloZ,
 	real* DD,
 	real* turbulentViscosity,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -227,7 +227,7 @@ void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
 									   real* DD,
-									   int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   int level,
 									   real* forces,
 									   real porosity,
@@ -245,7 +245,7 @@ void KernelADincomp7(   unsigned int numberOfThreads,
 								   unsigned int* neighborZ,
 								   real* DD,
 								   real* DD7,
-								   int size_Mat,
+								   unsigned long long numberOfLBnodes,
 								   bool EvenOrOdd);
 
 void KernelADincomp27(   unsigned int numberOfThreads, 
@@ -256,7 +256,7 @@ void KernelADincomp27(   unsigned int numberOfThreads,
 									unsigned int* neighborZ,
 									real* DD,
 									real* DD7,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void Init27(int myid,
@@ -267,7 +267,7 @@ void Init27(int myid,
                        unsigned int* neighborY,
                        unsigned int* neighborZ,
                        real* vParab,
-                       unsigned int size_Mat,
+                       unsigned long long numberOfLBnodes,
                        unsigned int grid_nx, 
                        unsigned int grid_ny, 
                        unsigned int grid_nz, 
@@ -285,7 +285,7 @@ void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   real* ux,
                                   real* uy,
                                   real* uz,
-                                  unsigned int size_Mat,
+                                  unsigned long long numberOfLBnodes,
                                   real* DD,
                                   real omega,
                                   bool EvenOrOdd);
@@ -300,7 +300,7 @@ void InitThS7(  unsigned int numberOfThreads,
                            real* ux,
                            real* uy,
                            real* uz,
-                           unsigned int size_Mat,
+                           unsigned long long numberOfLBnodes,
                            real* DD7,
                            bool EvenOrOdd);
 
@@ -313,7 +313,7 @@ void InitADDev27( unsigned int numberOfThreads,
                            real* ux,
                            real* uy,
                            real* uz,
-                           unsigned int size_Mat,
+                           unsigned long long numberOfLBnodes,
                            real* DD27,
                            bool EvenOrOdd);
 
@@ -330,7 +330,7 @@ void PostProcessorF3_2018Fehlberg(
 	real* vzOut,
 	real* DDStart,
 	real* G6,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd);
@@ -343,7 +343,7 @@ void CalcMac27( real* vxD,
                           unsigned int* neighborX,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
-                          unsigned int size_Mat,
+                          unsigned long long numberOfLBnodes,
                           unsigned int grid_nx, 
                           unsigned int grid_ny, 
                           unsigned int grid_nz, 
@@ -359,7 +359,7 @@ void CalcMacSP27(real* vxD,
                             unsigned int* neighborX,
                             unsigned int* neighborY,
                             unsigned int* neighborZ,
-                            unsigned int size_Mat,
+                            unsigned long long numberOfLBnodes,
                             unsigned int numberOfThreads, 
                             real* DD,
                             bool isEvenTimestep);
@@ -373,7 +373,7 @@ void CalcMacCompSP27(real* vxD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat,
+								unsigned long long numberOfLBnodes,
 								unsigned int numberOfThreads, 
 								real* DD,
 								bool isEvenTimestep);
@@ -383,7 +383,7 @@ void CalcMacThS7(  real* Conc,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               unsigned int numberOfThreads, 
                               real* DD7,
                               bool isEvenTimestep);
@@ -395,7 +395,7 @@ void PlaneConcThS7(real* Conc,
 							  unsigned int* neighborX,
 							  unsigned int* neighborY,
 							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
+							  unsigned long long numberOfLBnodes,
 							  unsigned int numberOfThreads, 
 							  real* DD7,
 							  bool isEvenTimestep);
@@ -407,7 +407,7 @@ void PlaneConcThS27(real* Conc,
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
 							   unsigned int* neighborZ,
-							   unsigned int size_Mat,
+							   unsigned long long numberOfLBnodes,
 							   unsigned int numberOfThreads, 
 							   real* DD27,
 							   bool isEvenTimestep);
@@ -418,7 +418,7 @@ void CalcConcentration27( unsigned int numberOfThreads,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      real* DD27,
                                      bool isEvenTimestep);
 
@@ -431,7 +431,7 @@ void CalcMedSP27(  real* vxD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               unsigned int numberOfThreads, 
                               real* DD,
                               bool isEvenTimestep);
@@ -445,7 +445,7 @@ void CalcMedCompSP27(real* vxD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat,
+								unsigned long long numberOfLBnodes,
 								unsigned int numberOfThreads, 
 								real* DD,
 								bool isEvenTimestep);
@@ -461,7 +461,7 @@ void CalcMedCompAD27(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	real* DD,
 	real* DD_AD,
@@ -477,7 +477,7 @@ void CalcMacMedSP27(  real* vxD,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
                                  unsigned int tdiff,
-                                 unsigned int size_Mat,
+                                 unsigned long long numberOfLBnodes,
                                  unsigned int numberOfThreads, 
                                  bool isEvenTimestep);
 
@@ -487,7 +487,7 @@ void ResetMedianValuesSP27(
 	real* vzD,
 	real* rhoD,
 	real* pressD,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
@@ -498,7 +498,7 @@ void ResetMedianValuesAD27(
 	real* rhoD,
 	real* pressD,
 	real* concD,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
@@ -511,7 +511,7 @@ void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
+										 unsigned long long numberOfLBnodes,
 										 unsigned int numberOfThreads, 
 										 real* DD,
 										 bool isEvenTimestep);
@@ -525,7 +525,7 @@ void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   unsigned int numberOfThreads, 
 									   real* DD,
 									   bool isEvenTimestep);
@@ -541,7 +541,7 @@ void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
+										 unsigned long long numberOfLBnodes,
 										 unsigned int numberOfThreads, 
 										 real* DD,
 										 bool isEvenTimestep);
@@ -557,7 +557,7 @@ void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   unsigned int numberOfThreads, 
 									   real* DD,
 									   bool isEvenTimestep);
@@ -576,7 +576,7 @@ void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											unsigned int numberOfThreads, 
 											real* DD,
 											bool isEvenTimestep);
@@ -595,7 +595,7 @@ void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  unsigned int numberOfThreads, 
 										  real* DD,
 										  bool isEvenTimestep);
@@ -612,7 +612,7 @@ void LBCalcMeasurePoints27(real* vxMP,
                                       unsigned int* neighborX,
                                       unsigned int* neighborY,
                                       unsigned int* neighborZ,
-                                      unsigned int size_Mat,
+                                      unsigned long long numberOfLBnodes,
                                       real* DD,
                                       unsigned int numberOfThreads, 
                                       bool isEvenTimestep);
@@ -627,7 +627,7 @@ void BcPress27(int nx,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
                           real* DD, 
-                          unsigned int size_Mat, 
+                          unsigned long long numberOfLBnodes, 
                           bool isEvenTimestep);
 
 void BcVel27(int nx, 
@@ -641,7 +641,7 @@ void BcVel27(int nx,
                         unsigned int* neighborY,
                         unsigned int* neighborZ,
                         real* DD, 
-                        unsigned int size_Mat, 
+                        unsigned long long numberOfLBnodes, 
                         bool isEvenTimestep, 
                         real u0x, 
                         real om);
@@ -661,7 +661,7 @@ void QDevCompThinWalls27(unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									unsigned int* neighborWSB,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -675,7 +675,7 @@ void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QDevCompHighNu27(unsigned int numberOfThreads,
@@ -687,7 +687,7 @@ void QDevCompHighNu27(unsigned int numberOfThreads,
 								 unsigned int* neighborX,
 								 unsigned int* neighborY,
 								 unsigned int* neighborZ,
-								 unsigned int size_Mat, 
+								 unsigned long long numberOfLBnodes, 
 								 bool isEvenTimestep);
 
 void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -704,7 +704,7 @@ void QVelDeviceCouette27(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QVelDevice1h27( unsigned int numberOfThreads,
@@ -726,7 +726,7 @@ void QVelDevice1h27( unsigned int numberOfThreads,
 								real* coordX,
 								real* coordY,
 								real* coordZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep);
 
 void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -743,7 +743,7 @@ void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  unsigned int* neighborX,
 									  unsigned int* neighborY,
 									  unsigned int* neighborZ,
-									  unsigned int size_Mat, 
+									  unsigned long long numberOfLBnodes, 
 									  bool isEvenTimestep);
 
 void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -762,7 +762,7 @@ void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           unsigned int* neighborY,
 							           unsigned int* neighborZ,
 									   unsigned int* neighborWSB,
-							           unsigned int size_Mat, 
+							           unsigned long long numberOfLBnodes, 
 							           bool isEvenTimestep);
 
 void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -779,7 +779,7 @@ void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										unsigned int* neighborX,
 										unsigned int* neighborY,
 										unsigned int* neighborZ,
-										unsigned int size_Mat, 
+										unsigned long long numberOfLBnodes, 
 										bool isEvenTimestep);
 
 void QVelDevCompHighNu27(unsigned int numberOfThreads,
@@ -794,7 +794,7 @@ void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QVeloDevEQ27(unsigned int numberOfThreads,
@@ -808,7 +808,7 @@ void QVeloDevEQ27(unsigned int numberOfThreads,
 							 unsigned int* neighborX,
 							 unsigned int* neighborY,
 							 unsigned int* neighborZ,
-							 unsigned int size_Mat, 
+							 unsigned long long numberOfLBnodes, 
 							 bool isEvenTimestep);
 
 void QVeloStreetDevEQ27(
@@ -848,7 +848,7 @@ void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QSlipNormDevComp27(unsigned int numberOfThreads,
@@ -863,7 +863,7 @@ void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
@@ -883,7 +883,7 @@ void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep);
 
 void QPressDevDirDepBot27(unsigned int numberOfThreads,
@@ -895,11 +895,13 @@ void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int size_Mat, 
+                                     unsigned long long numberOfLBnodes, 
                                      bool isEvenTimestep);
 
 void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
+void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+
 void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
 void QPressDevOld27(unsigned int numberOfThreads,
@@ -912,7 +914,7 @@ void QPressDevOld27(unsigned int numberOfThreads,
                                unsigned int* neighborX,
                                unsigned int* neighborY,
                                unsigned int* neighborZ,
-                               unsigned int size_Mat, 
+                               unsigned long long numberOfLBnodes, 
                                bool isEvenTimestep);
 
 void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -928,7 +930,7 @@ void QPressDevZero27(unsigned int numberOfThreads,
                                 unsigned int* neighborX,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
-                                unsigned int size_Mat, 
+                                unsigned long long numberOfLBnodes, 
                                 bool isEvenTimestep);
 
 void QPressDevFake27(   unsigned int numberOfThreads,
@@ -941,7 +943,7 @@ void QPressDevFake27(   unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -956,7 +958,7 @@ void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QPressDevAntiBB27(  unsigned int numberOfThreads,
@@ -972,7 +974,7 @@ void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep);
 
 void PressSchlaffer27(unsigned int numberOfThreads,
@@ -989,7 +991,7 @@ void PressSchlaffer27(unsigned int numberOfThreads,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep);
 
 void VelSchlaffer27(  unsigned int numberOfThreads,
@@ -1004,9 +1006,17 @@ void VelSchlaffer27(  unsigned int numberOfThreads,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep);
 
+void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);
+
+void PrecursorDevEQ27(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);
+
+void PrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);
+
+void QPrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);
+
 void QADDev7(unsigned int numberOfThreads,
                         real* DD, 
                         real* DD7,
@@ -1019,7 +1029,7 @@ void QADDev7(unsigned int numberOfThreads,
                         unsigned int* neighborX,
                         unsigned int* neighborY,
                         unsigned int* neighborZ,
-                        unsigned int size_Mat, 
+                        unsigned long long numberOfLBnodes, 
                         bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
@@ -1033,7 +1043,7 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 	uint* neighborZ,
 	real* distributions,
 	real* distributionsAD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* forces,
 	bool isEvenTimestep);
 
@@ -1053,7 +1063,7 @@ void ADSlipVelDevComp(
 	uint * neighborX,
 	uint * neighborY,
 	uint * neighborZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep);
 	
 void QADDirichletDev27( unsigned int numberOfThreads,
@@ -1068,7 +1078,7 @@ void QADDirichletDev27( unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void QADBBDev27(  unsigned int numberOfThreads,
@@ -1083,7 +1093,7 @@ void QADBBDev27(  unsigned int numberOfThreads,
 							 unsigned int* neighborX,
 							 unsigned int* neighborY,
 							 unsigned int* neighborZ,
-							 unsigned int size_Mat, 
+							 unsigned long long numberOfLBnodes, 
 							 bool isEvenTimestep);
 
 void QADVelDev7(unsigned int numberOfThreads,
@@ -1099,7 +1109,7 @@ void QADVelDev7(unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
-                           unsigned int size_Mat, 
+                           unsigned long long numberOfLBnodes, 
                            bool isEvenTimestep);
 
 
@@ -1116,7 +1126,7 @@ void QADVelDev27(  unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressDev7( unsigned int numberOfThreads,
@@ -1132,7 +1142,7 @@ void QADPressDev7( unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressDev27(unsigned int numberOfThreads,
@@ -1148,7 +1158,7 @@ void QADPressDev27(unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressNEQNeighborDev27(
@@ -1161,7 +1171,7 @@ void QADPressNEQNeighborDev27(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											bool isEvenTimestep
 										);
 
@@ -1177,7 +1187,7 @@ void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QNoSlipADincompDev27(unsigned int numberOfThreads,
@@ -1192,7 +1202,7 @@ void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QADVeloIncompDev7( unsigned int numberOfThreads,
@@ -1208,7 +1218,7 @@ void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 
@@ -1225,7 +1235,7 @@ void QADVeloIncompDev27( unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QADPressIncompDev7(  unsigned int numberOfThreads,
@@ -1241,7 +1251,7 @@ void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QADPressIncompDev27(  unsigned int numberOfThreads,
@@ -1257,7 +1267,7 @@ void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  unsigned int* neighborX,
 									  unsigned int* neighborY,
 									  unsigned int* neighborZ,
-									  unsigned int size_Mat, 
+									  unsigned long long numberOfLBnodes, 
 									  bool isEvenTimestep);
 
 void PropVelo(   unsigned int numberOfThreads,
@@ -1270,7 +1280,7 @@ void PropVelo(   unsigned int numberOfThreads,
 							real* uz,
 							int* k_Q, 
 							unsigned int size_Prop,
-							unsigned int size_Mat,
+							unsigned long long numberOfLBnodes,
 							unsigned int* bcMatD,
 							real* DD,
 							bool EvenOrOdd);
@@ -1283,8 +1293,8 @@ void ScaleCF27( real* DC,
                            unsigned int* neighborFX,
                            unsigned int* neighborFY,
                            unsigned int* neighborFZ,
-                           unsigned int size_MatC, 
-                           unsigned int size_MatF, 
+                           unsigned long long numberOfLBnodesC, 
+                           unsigned long long numberOfLBnodesF, 
                            bool isEvenTimestep,
                            unsigned int* posCSWB, 
                            unsigned int* posFSWB, 
@@ -1306,8 +1316,8 @@ void ScaleFC27( real* DC,
                            unsigned int* neighborFX,
                            unsigned int* neighborFY,
                            unsigned int* neighborFZ,
-                           unsigned int size_MatC, 
-                           unsigned int size_MatF, 
+                           unsigned long long numberOfLBnodesC, 
+                           unsigned long long numberOfLBnodesF, 
                            bool isEvenTimestep,
                            unsigned int* posC, 
                            unsigned int* posFSWB, 
@@ -1329,8 +1339,8 @@ void ScaleCFEff27(real* DC,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
-                             unsigned int size_MatC, 
-                             unsigned int size_MatF, 
+                             unsigned long long numberOfLBnodesC, 
+                             unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
@@ -1353,8 +1363,8 @@ void ScaleFCEff27(real* DC,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
-                             unsigned int size_MatC, 
-                             unsigned int size_MatF, 
+                             unsigned long long numberOfLBnodesC, 
+                             unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
@@ -1377,8 +1387,8 @@ void ScaleCFLast27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1401,8 +1411,8 @@ void ScaleFCLast27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -1425,8 +1435,8 @@ void ScaleCFpress27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1449,8 +1459,8 @@ void ScaleFCpress27(  real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posC, 
                                  unsigned int* posFSWB, 
@@ -1473,8 +1483,8 @@ void ScaleCF_Fix_27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1497,8 +1507,8 @@ void ScaleCF_Fix_comp_27(   real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posCSWB, 
 									   unsigned int* posFSWB, 
@@ -1521,8 +1531,8 @@ void ScaleCF_0817_comp_27(  real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posCSWB, 
 									   unsigned int* posFSWB, 
@@ -1547,8 +1557,8 @@ void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											unsigned int* neighborFX,
 											unsigned int* neighborFY,
 											unsigned int* neighborFZ,
-											unsigned int size_MatC, 
-											unsigned int size_MatF, 
+											unsigned long long numberOfLBnodesC, 
+											unsigned long long numberOfLBnodesF, 
 											bool isEvenTimestep,
 											unsigned int* posCSWB, 
 											unsigned int* posFSWB, 
@@ -1572,8 +1582,8 @@ void ScaleCF_comp_D3Q27F3(real* DC,
 									 unsigned int* neighborFX,
 									 unsigned int* neighborFY,
 									 unsigned int* neighborFZ,
-									 unsigned int size_MatC, 
-									 unsigned int size_MatF, 
+									 unsigned long long numberOfLBnodesC, 
+									 unsigned long long numberOfLBnodesF, 
 									 bool isEvenTimestep,
 									 unsigned int* posCSWB, 
 									 unsigned int* posFSWB, 
@@ -1597,8 +1607,8 @@ void ScaleCF_staggered_time_comp_27( real* DC,
 												unsigned int* neighborFX,
 												unsigned int* neighborFY,
 												unsigned int* neighborFZ,
-												unsigned int size_MatC, 
-												unsigned int size_MatF, 
+												unsigned long long numberOfLBnodesC, 
+												unsigned long long numberOfLBnodesF, 
 												bool isEvenTimestep,
 												unsigned int* posCSWB, 
 												unsigned int* posFSWB, 
@@ -1624,8 +1634,8 @@ void ScaleCF_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int* neighborFX,
 											  unsigned int* neighborFY,
 											  unsigned int* neighborFZ,
-											  unsigned int size_MatC, 
-											  unsigned int size_MatF, 
+											  unsigned long long numberOfLBnodesC, 
+											  unsigned long long numberOfLBnodesF, 
 											  bool isEvenTimestep,
 											  unsigned int* posCSWB, 
 											  unsigned int* posFSWB, 
@@ -1649,8 +1659,8 @@ void ScaleCF_AA2016_comp_27( real* DC,
 										unsigned int* neighborFX,
 										unsigned int* neighborFY,
 										unsigned int* neighborFZ,
-										unsigned int size_MatC, 
-										unsigned int size_MatF, 
+										unsigned long long numberOfLBnodesC, 
+										unsigned long long numberOfLBnodesF, 
 										bool isEvenTimestep,
 										unsigned int* posCSWB, 
 										unsigned int* posFSWB, 
@@ -1674,8 +1684,8 @@ void ScaleCF_NSPress_27(real* DC,
 								  unsigned int* neighborFX,
 								  unsigned int* neighborFY,
 								  unsigned int* neighborFZ,
-								  unsigned int size_MatC, 
-								  unsigned int size_MatF, 
+								  unsigned long long numberOfLBnodesC, 
+								  unsigned long long numberOfLBnodesF, 
 								  bool isEvenTimestep,
 								  unsigned int* posCSWB, 
 								  unsigned int* posFSWB, 
@@ -1698,8 +1708,8 @@ void ScaleFC_Fix_27(  real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posC, 
                                  unsigned int* posFSWB, 
@@ -1722,8 +1732,8 @@ void ScaleFC_Fix_comp_27(   real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posC, 
 									   unsigned int* posFSWB, 
@@ -1746,8 +1756,8 @@ void ScaleFC_0817_comp_27(  real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posC, 
 									   unsigned int* posFSWB, 
@@ -1772,8 +1782,8 @@ void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  unsigned int* neighborFX,
 										  unsigned int* neighborFY,
 										  unsigned int* neighborFZ,
-										  unsigned int size_MatC, 
-										  unsigned int size_MatF, 
+										  unsigned long long numberOfLBnodesC, 
+										  unsigned long long numberOfLBnodesF, 
 										  bool isEvenTimestep,
 										  unsigned int* posC, 
 										  unsigned int* posFSWB, 
@@ -1797,8 +1807,8 @@ void ScaleFC_comp_D3Q27F3( real* DC,
 									  unsigned int* neighborFX,
 									  unsigned int* neighborFY,
 									  unsigned int* neighborFZ,
-									  unsigned int size_MatC, 
-									  unsigned int size_MatF, 
+									  unsigned long long numberOfLBnodesC, 
+									  unsigned long long numberOfLBnodesF, 
 									  bool isEvenTimestep,
 									  unsigned int* posC, 
 									  unsigned int* posFSWB, 
@@ -1822,8 +1832,8 @@ void ScaleFC_staggered_time_comp_27( real* DC,
 												unsigned int* neighborFX,
 												unsigned int* neighborFY,
 												unsigned int* neighborFZ,
-												unsigned int size_MatC, 
-												unsigned int size_MatF, 
+												unsigned long long numberOfLBnodesC, 
+												unsigned long long numberOfLBnodesF, 
 												bool isEvenTimestep,
 												unsigned int* posC, 
 												unsigned int* posFSWB, 
@@ -1849,8 +1859,8 @@ void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int* neighborFX,
 											  unsigned int* neighborFY,
 											  unsigned int* neighborFZ,
-											  unsigned int size_MatC, 
-											  unsigned int size_MatF, 
+											  unsigned long long numberOfLBnodesC, 
+											  unsigned long long numberOfLBnodesF, 
 											  bool isEvenTimestep,
 											  unsigned int* posC, 
 											  unsigned int* posFSWB, 
@@ -1874,8 +1884,8 @@ void ScaleFC_AA2016_comp_27( real* DC,
 										unsigned int* neighborFX,
 										unsigned int* neighborFY,
 										unsigned int* neighborFZ,
-										unsigned int size_MatC, 
-										unsigned int size_MatF, 
+										unsigned long long numberOfLBnodesC, 
+										unsigned long long numberOfLBnodesF, 
 										bool isEvenTimestep,
 										unsigned int* posC, 
 										unsigned int* posFSWB, 
@@ -1899,8 +1909,8 @@ void ScaleFC_NSPress_27(  real* DC,
 									 unsigned int* neighborFX,
 									 unsigned int* neighborFY,
 									 unsigned int* neighborFZ,
-									 unsigned int size_MatC, 
-									 unsigned int size_MatF, 
+									 unsigned long long numberOfLBnodesC, 
+									 unsigned long long numberOfLBnodesF, 
 									 bool isEvenTimestep,
 									 unsigned int* posC, 
 									 unsigned int* posFSWB, 
@@ -1925,8 +1935,8 @@ void ScaleCFThS7(  real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1945,8 +1955,8 @@ void ScaleFCThS7(  real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -1965,8 +1975,8 @@ void ScaleCFThSMG7(   real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posCSWB, 
                                  unsigned int* posFSWB, 
@@ -1986,8 +1996,8 @@ void ScaleFCThSMG7(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -2007,8 +2017,8 @@ void ScaleCFThS27( real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -2028,8 +2038,8 @@ void ScaleFCThS27( real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -2049,7 +2059,7 @@ void DragLiftPostD27(real* DD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
@@ -2063,7 +2073,7 @@ void DragLiftPreD27( real* DD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
@@ -2074,7 +2084,7 @@ void CalcCPtop27(real* DD,
 							unsigned int* neighborX,
 							unsigned int* neighborY,
 							unsigned int* neighborZ,
-							unsigned int size_Mat, 
+							unsigned long long numberOfLBnodes, 
 							bool isEvenTimestep,
 							unsigned int numberOfThreads);
 
@@ -2085,7 +2095,7 @@ void CalcCPbottom27(real* DD,
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
 							   unsigned int* neighborZ,
-							   unsigned int size_Mat, 
+							   unsigned long long numberOfLBnodes, 
 							   bool isEvenTimestep,
 							   unsigned int numberOfThreads);
 
@@ -2096,7 +2106,7 @@ void GetSendFsPreDev27(real* DD,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep,
 								  unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2108,7 +2118,7 @@ void GetSendFsPostDev27(real* DD,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep,
 								   unsigned int numberOfThreads, 
 	                               cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2120,7 +2130,7 @@ void SetRecvFsPreDev27(real* DD,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep, unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
@@ -2131,7 +2141,7 @@ void SetRecvFsPostDev27(real* DD,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep,
 								   unsigned int numberOfThreads,
                                    cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2144,7 +2154,7 @@ void getSendGsDevF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
@@ -2156,7 +2166,7 @@ void setRecvGsDevF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
@@ -2172,7 +2182,7 @@ void WallFuncDev27(unsigned int numberOfThreads,
 							  unsigned int* neighborX,
 							  unsigned int* neighborY,
 							  unsigned int* neighborZ,
-							  unsigned int size_Mat, 
+							  unsigned long long numberOfLBnodes, 
 							  bool isEvenTimestep);
 
 void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
@@ -2190,7 +2200,7 @@ void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  real* DD,
 										  bool isEvenTimestep);
 
@@ -2204,7 +2214,7 @@ void GetVelotoForce27(unsigned int numberOfThreads,
 								 unsigned int* neighborX,
 								 unsigned int* neighborY,
 								 unsigned int* neighborZ,
-								 unsigned int size_Mat, 
+								 unsigned long long numberOfLBnodes, 
 								 bool isEvenTimestep);
 
 void InitParticlesDevice(real* coordX,
@@ -2229,7 +2239,7 @@ void InitParticlesDevice(real* coordX,
 									unsigned int* neighborWSB,
 									int level,
 									unsigned int numberOfParticles, 
-									unsigned int size_Mat,
+									unsigned long long numberOfLBnodes,
 									unsigned int numberOfThreads);
 
 void MoveParticlesDevice(real* coordX,
@@ -2257,16 +2267,16 @@ void MoveParticlesDevice(real* coordX,
 									unsigned int timestep, 
 									unsigned int numberOfTimesteps, 
 									unsigned int numberOfParticles, 
-									unsigned int size_Mat,
+									unsigned long long numberOfLBnodes,
 									unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
 void initRandomDevice(curandState* state,
-								 unsigned int size_Mat,
+								 unsigned long long numberOfLBnodes,
 								 unsigned int numberOfThreads);
 
 void generateRandomValuesDevice(curandState* state,
-										   unsigned int size_Mat,
+										   unsigned long long numberOfLBnodes,
 										   real* randArray,
 										   unsigned int numberOfThreads);
 
@@ -2285,7 +2295,7 @@ void CalcTurbulenceIntensityDevice(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat, 
+   unsigned long long numberOfLBnodes, 
    bool isEvenTimestep,
    uint numberOfThreads);
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 94b9704b7ca57df4cd985f5aff9521b8a087b97f..3134db44346ee7f465a5c8f04505ee5749482fbf 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -22,7 +22,7 @@ __global__ void LB_Kernel_Casc27(real s9,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
                                             real* DDStart,
-                                            int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_27(  real s9,
@@ -31,7 +31,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   unsigned int* neighborY,
                                                   unsigned int* neighborZ,
                                                   real* DDStart,
-                                                  int size_Mat,
+                                                  unsigned long long numberOfLBnodes,
                                                   bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
@@ -40,7 +40,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
                                                       real* DDStart,
-                                                      int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
@@ -49,134 +49,134 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
                                                          real* DDStart,
-                                                         int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														int level,
-														real* forces,
-														bool EvenOrOdd);
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        unsigned long long numberOfLBnodes,
+                                                        int level,
+                                                        real* forces,
+                                                        bool EvenOrOdd);
 
 
 __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
-																unsigned int* bcMatD,
-																unsigned int* neighborX,
-																unsigned int* neighborY,
-																unsigned int* neighborZ,
-																real* DDStart,
-																int size_Mat,
-																int level,
-																real* forces,
-																bool EvenOrOdd);
+                                                                unsigned int* bcMatD,
+                                                                unsigned int* neighborX,
+                                                                unsigned int* neighborY,
+                                                                unsigned int* neighborZ,
+                                                                real* DDStart,
+                                                                unsigned long long numberOfLBnodes,
+                                                                int level,
+                                                                real* forces,
+                                                                bool EvenOrOdd);
 
 
 
 __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
-													real deltaPhi,
-													real angularVelocity,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* coordX,
-													real* coordY,
-													real* coordZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    real deltaPhi,
+                                                    real angularVelocity,
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* coordX,
+                                                    real* coordY,
+                                                    real* coordZ,
+                                                    real* DDStart,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Cascade_SP_27( real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_SP_27( real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														real* dxxUx,
-														real* dyyUy,
-														real* dzzUz,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        real* dxxUx,
+                                                        real* dyyUy,
+                                                        real* dzzUz,
+                                                        unsigned long long numberOfLBnodes,
+                                                        bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool EvenOrOdd);
 
 __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void Cumulant_One_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 inline __device__ void forwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K);
 
@@ -189,57 +189,57 @@ inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, rea
 
 
 __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
-	real omega_in,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int* neighborWSB,
-	real* veloX,
-	real* veloY,
-	real* veloZ,
-	real* DDStart,
-	real* turbulentViscosity,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega_in,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    real* veloX,
+    real* veloY,
+    real* veloZ,
+    real* DDStart,
+    real* turbulentViscosity,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 
 __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
-															unsigned int* neighborX,
-															unsigned int* neighborY,
-															unsigned int* neighborZ,
-															real* DDStart,
-															int size_Mat,
-															int level,
-															real* forces,
-															real porosity,
-															real darcy,
-															real forchheimer,
-															unsigned int sizeOfPorousMedia,
-															unsigned int* nodeIdsPorousMedia,
-															bool EvenOrOdd);
+                                                            unsigned int* neighborX,
+                                                            unsigned int* neighborY,
+                                                            unsigned int* neighborZ,
+                                                            real* DDStart,
+                                                            unsigned long long numberOfLBnodes,
+                                                            int level,
+                                                            real* forces,
+                                                            real porosity,
+                                                            real darcy,
+                                                            real forchheimer,
+                                                            unsigned int sizeOfPorousMedia,
+                                                            unsigned int* nodeIdsPorousMedia,
+                                                            bool EvenOrOdd);
 
 __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
-												  unsigned int* bcMatD,
-												  unsigned int* neighborX,
-												  unsigned int* neighborY,
-												  unsigned int* neighborZ,
-												  real* DDStart,
-												  real* DD7,
-												  int size_Mat,
-												  bool EvenOrOdd);
+                                                  unsigned int* bcMatD,
+                                                  unsigned int* neighborX,
+                                                  unsigned int* neighborY,
+                                                  unsigned int* neighborZ,
+                                                  real* DDStart,
+                                                  real* DD7,
+                                                  unsigned long long numberOfLBnodes,
+                                                  bool EvenOrOdd);
 
 __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
-												   unsigned int* bcMatD,
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   real* DDStart,
-												   real* DD27,
-												   int size_Mat,
-												   bool EvenOrOdd);
+                                                   unsigned int* bcMatD,
+                                                   unsigned int* neighborX,
+                                                   unsigned int* neighborY,
+                                                   unsigned int* neighborZ,
+                                                   real* DDStart,
+                                                   real* DD27,
+                                                   unsigned long long numberOfLBnodes,
+                                                   bool EvenOrOdd);
 
 __global__ void LBInit27( int myid,
                                      int numprocs,
@@ -249,7 +249,7 @@ __global__ void LBInit27( int myid,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
                                      real* vParabel,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      unsigned int grid_nx,
                                      unsigned int grid_ny,
                                      unsigned int grid_nz,
@@ -266,7 +266,7 @@ __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                real* ux,
                                                real* uy,
                                                real* uz,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                real* DD,
                                                real omega,
                                                bool EvenOrOdd);
@@ -279,7 +279,7 @@ __global__ void InitAD7( unsigned int* neighborX,
                                        real* ux,
                                        real* uy,
                                        real* uz,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        real* DD7,
                                        bool EvenOrOdd);
 
@@ -291,26 +291,26 @@ __global__ void InitAD27(unsigned int* neighborX,
                                        real* ux,
                                        real* uy,
                                        real* uz,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        real* DD27,
                                        bool EvenOrOdd);
 
 __global__ void LB_PostProcessor_F3_2018_Fehlberg(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* rhoOut,
-	real* vxOut,
-	real* vyOut,
-	real* vzOut,
-	real* DDStart,
-	real* G6,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rhoOut,
+    real* vxOut,
+    real* vyOut,
+    real* vzOut,
+    real* DDStart,
+    real* G6,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
@@ -320,7 +320,7 @@ __global__ void LBCalcMac27( real* vxD,
                                         unsigned int* neighborY,
                                         unsigned int* neighborZ,
                                         unsigned int* geoD,
-                                        unsigned int size_Mat,
+                                        unsigned long long numberOfLBnodes,
                                         real* DD,
                                         bool isEvenTimestep);
 
@@ -333,60 +333,60 @@ __global__ void LBCalcMacSP27( real* vxD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD,
                                           bool isEvenTimestep);
 
 __global__ void LBCalcMacCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep);
+                                              real* vyD,
+                                              real* vzD,
+                                              real* rhoD,
+                                              real* pressD,
+                                              unsigned int* geoD,
+                                              unsigned int* neighborX,
+                                              unsigned int* neighborY,
+                                              unsigned int* neighborZ,
+                                              unsigned long long numberOfLBnodes,
+                                              real* DD,
+                                              bool isEvenTimestep);
 
 __global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD7,
                                           bool isEvenTimestep);
 
 __global__ void GetPlaneConc7(real* Conc,
-								            int* kPC,
-								            unsigned int numberOfPointskPC,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											real* DD7,
-											bool isEvenTimestep);
+                                            int* kPC,
+                                            unsigned int numberOfPointskPC,
+                                            unsigned int* geoD,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            real* DD7,
+                                            bool isEvenTimestep);
 
 __global__ void GetPlaneConc27(real* Conc,
-								             int* kPC,
-								             unsigned int numberOfPointskPC,
-											 unsigned int* geoD,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 real* DD27,
-											 bool isEvenTimestep);
+                                             int* kPC,
+                                             unsigned int numberOfPointskPC,
+                                             unsigned int* geoD,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             real* DD27,
+                                             bool isEvenTimestep);
 
 __global__ void CalcConc27(real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD27,
                                           bool isEvenTimestep);
 
@@ -399,38 +399,38 @@ __global__ void LBCalcMedSP27( real* vxD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD,
                                           bool isEvenTimestep);
 
 __global__ void LBCalcMedCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep);
+                                              real* vyD,
+                                              real* vzD,
+                                              real* rhoD,
+                                              real* pressD,
+                                              unsigned int* geoD,
+                                              unsigned int* neighborX,
+                                              unsigned int* neighborY,
+                                              unsigned int* neighborZ,
+                                              unsigned long long numberOfLBnodes,
+                                              real* DD,
+                                              bool isEvenTimestep);
 
 __global__ void LBCalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    real* DD_AD,
+    bool isEvenTimestep);
 
 __global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
@@ -442,119 +442,119 @@ __global__ void LBCalcMacMedSP27( real* vxD,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
                                              unsigned int tdiff,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
-														real* kyzFromfcNEQ,
-														real* kxzFromfcNEQ,
-														real* kxxMyyFromfcNEQ,
-														real* kxxMzzFromfcNEQ,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep);
+                                                        real* kyzFromfcNEQ,
+                                                        real* kxzFromfcNEQ,
+                                                        real* kxxMyyFromfcNEQ,
+                                                        real* kxxMzzFromfcNEQ,
+                                                        unsigned int* geoD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        unsigned long long numberOfLBnodes,
+                                                        real* DD,
+                                                        bool isEvenTimestep);
 
 __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
-													real* kyzFromfcNEQ,
-													real* kxzFromfcNEQ,
-													real* kxxMyyFromfcNEQ,
-													real* kxxMzzFromfcNEQ,
-													unsigned int* geoD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													real* DD,
-													bool isEvenTimestep);
+                                                    real* kyzFromfcNEQ,
+                                                    real* kxzFromfcNEQ,
+                                                    real* kxxMyyFromfcNEQ,
+                                                    real* kxxMzzFromfcNEQ,
+                                                    unsigned int* geoD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned long long numberOfLBnodes,
+                                                    real* DD,
+                                                    bool isEvenTimestep);
 
 __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
-														real* CUMabc,
-														real* CUMbac,
-														real* CUMbca,
-														real* CUMcba,
-														real* CUMacb,
-														real* CUMcab,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        real* CUMabc,
+                                                        real* CUMbac,
+                                                        real* CUMbca,
+                                                        real* CUMcba,
+                                                        real* CUMacb,
+                                                        real* CUMcab,
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        unsigned long long numberOfLBnodes,
+                                                        bool EvenOrOdd);
 
 __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
-													real* CUMabc,
-													real* CUMbac,
-													real* CUMbca,
-													real* CUMcba,
-													real* CUMacb,
-													real* CUMcab,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    real* CUMabc,
+                                                    real* CUMbac,
+                                                    real* CUMbca,
+                                                    real* CUMcba,
+                                                    real* CUMacb,
+                                                    real* CUMcab,
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
-															real* CUMbcb,
-															real* CUMbbc,
-															real* CUMcca,
-															real* CUMcac,
-															real* CUMacc,
-															real* CUMbcc,
-															real* CUMcbc,
-															real* CUMccb,
-															real* CUMccc,
-															unsigned int* bcMatD,
-															unsigned int* neighborX,
-															unsigned int* neighborY,
-															unsigned int* neighborZ,
-															real* DDStart,
-															int size_Mat,
-															bool EvenOrOdd);
+                                                            real* CUMbcb,
+                                                            real* CUMbbc,
+                                                            real* CUMcca,
+                                                            real* CUMcac,
+                                                            real* CUMacc,
+                                                            real* CUMbcc,
+                                                            real* CUMcbc,
+                                                            real* CUMccb,
+                                                            real* CUMccc,
+                                                            unsigned int* bcMatD,
+                                                            unsigned int* neighborX,
+                                                            unsigned int* neighborY,
+                                                            unsigned int* neighborZ,
+                                                            real* DDStart,
+                                                            unsigned long long numberOfLBnodes,
+                                                            bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
-														real* CUMbcb,
-														real* CUMbbc,
-														real* CUMcca,
-														real* CUMcac,
-														real* CUMacc,
-														real* CUMbcc,
-														real* CUMcbc,
-														real* CUMccb,
-														real* CUMccc,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        real* CUMbcb,
+                                                        real* CUMbbc,
+                                                        real* CUMcca,
+                                                        real* CUMcac,
+                                                        real* CUMacc,
+                                                        real* CUMbcc,
+                                                        real* CUMcbc,
+                                                        real* CUMccb,
+                                                        real* CUMccc,
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        unsigned long long numberOfLBnodes,
+                                                        bool EvenOrOdd);
 
 __global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* vyMP,
@@ -568,7 +568,7 @@ __global__ void LBCalcMeasurePoints(real* vxMP,
                                                unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                real* DD,
                                                bool isEvenTimestep);
 
@@ -580,7 +580,7 @@ __global__ void LB_BC_Press_East27( int nx,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
                                                real* DD,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                bool isEvenTimestep) ;
 
 __global__ void LB_BC_Vel_West_27( int nx,
@@ -592,7 +592,7 @@ __global__ void LB_BC_Vel_West_27( int nx,
                                               unsigned int* neighborY,
                                               unsigned int* neighborZ,
                                               real* DD,
-                                              unsigned int size_Mat,
+                                              unsigned long long numberOfLBnodes,
                                               bool isEvenTimestep,
                                               real u0x,
                                               unsigned int grid_nx,
@@ -608,64 +608,64 @@ __global__ void QDevice27(real* distributions,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
+                                     unsigned long long numberOfLBnodes,
                                      bool isEvenTimestep);
 
 __global__ void QDeviceComp27(
-										 real* distributions,
-										 int* subgridDistanceIndices,
-										 real* subgridDistances,
-										 unsigned int numberOfBCnodes,
-										 real omega,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int numberOfLBnodes,
-										 bool isEvenTimestep);
+                                         real* distributions,
+                                         int* subgridDistanceIndices,
+                                         real* subgridDistances,
+                                         unsigned int numberOfBCnodes,
+                                         real omega,
+                                         unsigned int* neighborX,
+                                         unsigned int* neighborY,
+                                         unsigned int* neighborZ,
+                                         unsigned long long numberOfLBnodes,
+                                         bool isEvenTimestep);
 
 __global__ void QDeviceCompThinWallsPartOne27(real* DD,
-														 int* k_Q,
-														 real* QQ,
-														 unsigned int numberOfBCnodes,
-														 real om1,
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat,
-														 bool isEvenTimestep);
-
-__global__ void QDevice3rdMomentsComp27(	 real* distributions, 
-													 int* subgridDistanceIndices, 
-													 real* subgridDistances,
-													 unsigned int numberOfBCnodes, 
-													 real omega, 
-													 unsigned int* neighborX,
-													 unsigned int* neighborY,
-													 unsigned int* neighborZ,
-													 unsigned int numberOfLBnodes, 
-													 bool isEvenTimestep);
+                                                         int* k_Q,
+                                                         real* QQ,
+                                                         unsigned int numberOfBCnodes,
+                                                         real om1,
+                                                         unsigned int* neighborX,
+                                                         unsigned int* neighborY,
+                                                         unsigned int* neighborZ,
+                                                         unsigned long long numberOfLBnodes,
+                                                         bool isEvenTimestep);
+
+__global__ void QDevice3rdMomentsComp27(	 real* distributions,
+                                                     int* subgridDistanceIndices,
+                                                     real* subgridDistances,
+                                                     unsigned int numberOfBCnodes,
+                                                     real omega,
+                                                     unsigned int* neighborX,
+                                                     unsigned int* neighborY,
+                                                     unsigned int* neighborZ,
+                                                     unsigned long long numberOfLBnodes,
+                                                     bool isEvenTimestep);
 
 __global__ void QDeviceIncompHighNu27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep);
 
 __global__ void QDeviceCompHighNu27(	 real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep);
 
 //Velocity BCs
 __global__ void QVelDevPlainBB27(
@@ -679,43 +679,43 @@ __global__ void QVelDevPlainBB27(
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
-    uint numberOfLBnodes,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QVelDevCouette27(real* vx,
-											real* vy,
-											real* vz,
-											real* DD,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* vy,
+                                            real* vz,
+                                            real* DD,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void QVelDev1h27( int inx,
-										int iny,
-										real* vx,
-										real* vy,
-										real* vz,
-										real* DD,
-										int* k_Q,
-										real* QQ,
-										unsigned int numberOfBCnodes,
-										real om1,
-										real Phi,
-										real angularVelocity,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* coordX,
-										real* coordY,
-										real* coordZ,
-										unsigned int size_Mat,
-										bool isEvenTimestep);
+                                        int iny,
+                                        real* vx,
+                                        real* vy,
+                                        real* vz,
+                                        real* DD,
+                                        int* k_Q,
+                                        real* QQ,
+                                        unsigned int numberOfBCnodes,
+                                        real om1,
+                                        real Phi,
+                                        real angularVelocity,
+                                        unsigned int* neighborX,
+                                        unsigned int* neighborY,
+                                        unsigned int* neighborZ,
+                                        real* coordX,
+                                        real* coordY,
+                                        real* coordZ,
+                                        unsigned long long numberOfLBnodes,
+                                        bool isEvenTimestep);
 
 __global__ void QVelDevice27(int inx,
                                         int iny,
@@ -730,111 +730,111 @@ __global__ void QVelDevice27(int inx,
                                         unsigned int* neighborX,
                                         unsigned int* neighborY,
                                         unsigned int* neighborZ,
-                                        unsigned int size_Mat,
+                                        unsigned long long numberOfLBnodes,
                                         bool isEvenTimestep);
 
 __global__ void QVelDeviceCompPlusSlip27(real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool isEvenTimestep);
 
 __global__ void QVelDeviceComp27(real* velocityX,
-											real* velocityY,
-											real* velocityZ,
-											real* distribution,
-											int* subgridDistanceIndices,
-											real* subgridDistances,
-											unsigned int numberOfBCnodes,
-											real omega,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int numberOfLBnodes,
-											bool isEvenTimestep);
+                                            real* velocityY,
+                                            real* velocityZ,
+                                            real* distribution,
+                                            int* subgridDistanceIndices,
+                                            real* subgridDistances,
+                                            unsigned int numberOfBCnodes,
+                                            real omega,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void QVelDeviceCompThinWallsPartOne27(
-	real* vx,
-	real* vy,
-	real* vz,
-	real* DD,
-	int* k_Q,
-	real* QQ,
-	uint numberOfBCnodes,
-	real om1,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    uint numberOfBCnodes,
+    real om1,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void QThinWallsPartTwo27(
-	real* DD,
-	int* k_Q,
-	real* QQ,
-	uint numberOfBCnodes,
-	uint* geom,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint* neighborWSB,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    uint numberOfBCnodes,
+    uint* geom,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighborWSB,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void QVelDeviceCompZeroPress27(
-	real* velocityX,
-	real* velocityY,
-	real* velocityZ,
-	real* distribution,
-	int* subgridDistanceIndices,
-	real* subgridDistances,
-	unsigned int numberOfBCnodes,
-	real omega,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int numberOfLBnodes,
-	bool isEvenTimestep);
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distribution,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void QVelDeviceIncompHighNu27(real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool isEvenTimestep);
 
 __global__ void QVelDeviceCompHighNu27(	real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool isEvenTimestep);
 
 __global__ void QVeloDeviceEQ27(real* VeloX,
-										   real* VeloY,
-										   real* VeloZ,
+                                           real* VeloY,
+                                           real* VeloZ,
                                            real* DD,
                                            int* k_Q,
                                            int numberOfBCnodes,
@@ -842,22 +842,22 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void QVeloStreetDeviceEQ27(
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep);
+    real* veloXfraction,
+    real* veloYfraction,
+    int*  naschVelo,
+    real* DD,
+    int*  naschIndex,
+    int   numberOfStreetNodes,
+    real  velocityRatio,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool  isEvenTimestep);
 
 //Slip BCs
 __global__ void QSlipDevice27(real* DD,
@@ -868,139 +868,150 @@ __global__ void QSlipDevice27(real* DD,
                                          unsigned int* neighborX,
                                          unsigned int* neighborY,
                                          unsigned int* neighborZ,
-                                         unsigned int size_Mat,
+                                         unsigned long long numberOfLBnodes,
                                          bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27(real* DD,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
+                                    real* distributions,
+                                    int* subgridDistanceIndices,
                                     real* subgridDistances,
                                     unsigned int numberOfBCnodes,
-                                    real omega, 
+                                    real omega,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipPressureDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
+                                    real* distributions,
+                                    int* subgridDistanceIndices,
                                     real* subgridDistances,
                                     unsigned int numberOfBCnodes,
-                                    real omega, 
+                                    real omega,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipGeomDeviceComp27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 real* NormalX,
+                                                 real* NormalY,
+                                                 real* NormalZ,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep);
 
 __global__ void QSlipNormDeviceComp27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 real* NormalX,
+                                                 real* NormalY,
+                                                 real* NormalZ,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep);
+
+__global__ void BBSlipDeviceComp27(
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 // Stress BCs (wall model)
 __global__ void QStressDeviceComp27(real* DD,
-											   int* k_Q,
-											 int* k_N,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 real* turbViscosity,
-										     real* vx,
-											 real* vy,
-                                    	     real* vz,
-											 real* normalX,
-											 real* normalY,
-                                    	     real* normalZ,
-											 real* vx_bc,
-											 real* vy_bc,
-                                    	     real* vz_bc,
-											 real* vx1,
-                                    		 real* vy1,
-                                    		 real* vz1,
-											 int* samplingOffset,
-											 real* z0,
-											 bool  hasWallModelMonitor,
-											real* u_star_monitor,
-											real* Fx_monitor,
-											real* Fy_monitor,
-											real* Fz_monitor,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                               int* k_Q,
+                                             int* k_N,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             real* turbViscosity,
+                                             real* vx,
+                                             real* vy,
+                                             real* vz,
+                                             real* normalX,
+                                             real* normalY,
+                                             real* normalZ,
+                                             real* vx_bc,
+                                             real* vy_bc,
+                                             real* vz_bc,
+                                             real* vx1,
+                                             real* vy1,
+                                             real* vz1,
+                                             int* samplingOffset,
+                                             real* z0,
+                                             bool  hasWallModelMonitor,
+                                            real* u_star_monitor,
+                                            real* Fx_monitor,
+                                            real* Fy_monitor,
+                                            real* Fz_monitor,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             bool isEvenTimestep);
 
 __global__ void BBStressDevice27( real* DD,
-												int* k_Q,
-												int* k_N,
-												real* QQ,
-												unsigned int numberOfBCnodes,
-												real* vx,
-												real* vy,
-												real* vz,
-												real* normalX,
-												real* normalY,
-												real* normalZ,
-												real* vx_bc,
-												real* vy_bc,
-												real* vz_bc,
-												real* vx1,
-												real* vy1,
-												real* vz1,
-												int* samplingOffset,
-												real* z0,
-												bool  hasWallModelMonitor,
-												real* u_star_monitor,
-												real* Fx_monitor,
-												real* Fy_monitor,
-												real* Fz_monitor,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* k_Q,
+                                                int* k_N,
+                                                real* QQ,
+                                                unsigned int numberOfBCnodes,
+                                                real* vx,
+                                                real* vy,
+                                                real* vz,
+                                                real* normalX,
+                                                real* normalY,
+                                                real* normalZ,
+                                                real* vx_bc,
+                                                real* vy_bc,
+                                                real* vz_bc,
+                                                real* vx1,
+                                                real* vy1,
+                                                real* vz1,
+                                                int* samplingOffset,
+                                                real* z0,
+                                                bool  hasWallModelMonitor,
+                                                real* u_star_monitor,
+                                                real* Fx_monitor,
+                                                real* Fy_monitor,
+                                                real* Fz_monitor,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
 
 __global__ void BBStressPressureDevice27( real* DD,
-											            int* k_Q,
+                                                        int* k_Q,
                                              int* k_N,
                                              real* QQ,
                                              unsigned int  numberOfBCnodes,
@@ -1026,7 +1037,7 @@ __global__ void BBStressPressureDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 //Pressure BCs
@@ -1039,23 +1050,23 @@ __global__ void QPressDevice27( real* rhoBC,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void QPressDeviceAntiBB27(   real* rhoBC,
-												   real* vx,
-												   real* vy,
-												   real* vz,
-												   real* DD,
-												   int* k_Q,
-												   real* QQ,
-												   int numberOfBCnodes,
-												   real om1,
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   unsigned int size_Mat,
-												   bool isEvenTimestep);
+                                                   real* vx,
+                                                   real* vy,
+                                                   real* vz,
+                                                   real* DD,
+                                                   int* k_Q,
+                                                   real* QQ,
+                                                   int numberOfBCnodes,
+                                                   real om1,
+                                                   unsigned int* neighborX,
+                                                   unsigned int* neighborY,
+                                                   unsigned int* neighborZ,
+                                                   unsigned long long numberOfLBnodes,
+                                                   bool isEvenTimestep);
 
 __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD,
@@ -1065,7 +1076,7 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
-                                                      unsigned int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool isEvenTimestep);
 
 __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
@@ -1076,32 +1087,47 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      unsigned int* neighborX,
                                                      unsigned int* neighborY,
                                                      unsigned int* neighborZ,
-                                                     unsigned int size_Mat,
+                                                     unsigned long long numberOfLBnodes,
                                                      bool isEvenTimestep);
 
 __global__ void QPressNoRhoDevice27(  real* rhoBC,
-												 real* DD,
-												 int* k_Q,
-												 int* k_N,
-												 int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 real* distributions,
+                                                 int* k_Q,
+                                                 int* k_N,
+                                                 int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep,
+                                                 int direction);
+
+__global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
+                                            real* distributions,
+                                            int* k_Q,
+                                            int* k_N,
+                                            int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep,
+                                            int direction,
+                                            real densityCorrectionFactor);
 
 __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
-														 real* DD,
-														 int* k_Q,
-														 int* k_N,
-														 int numberOfBCnodes,
-														 real om1,
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat,
-														 bool isEvenTimestep);
+                                                         real* DD,
+                                                         int* k_Q,
+                                                         int* k_N,
+                                                         int numberOfBCnodes,
+                                                         real om1,
+                                                         unsigned int* neighborX,
+                                                         unsigned int* neighborY,
+                                                         unsigned int* neighborZ,
+                                                         unsigned long long numberOfLBnodes,
+                                                         bool isEvenTimestep);
 
 __global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD,
@@ -1112,20 +1138,20 @@ __global__ void QPressDeviceOld27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
-													real* DD,
-													int* k_Q,
-													int* k_N,
-													int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    int* k_N,
+                                                    int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned long long numberOfLBnodes,
+                                                    bool isEvenTimestep);
 
 __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution,
@@ -1136,7 +1162,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceEQZ27(real* rhoBC,
@@ -1149,17 +1175,17 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceZero27(  real* DD,
-												int* k_Q,
-												unsigned int numberOfBCnodes,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* k_Q,
+                                                unsigned int numberOfBCnodes,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
 
 __global__ void QPressDeviceFake27(real* rhoBC,
                                              real* DD,
@@ -1170,7 +1196,7 @@ __global__ void QPressDeviceFake27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void BBDevice27(real* distributions,
@@ -1180,20 +1206,20 @@ __global__ void BBDevice27(real* distributions,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
+                                     unsigned long long numberOfLBnodes,
                                      bool isEvenTimestep);
 
 __global__ void QPressDevice27_IntBB(real* rho,
-												real* DD,
-												int* k_Q,
-												real* QQ,
-												unsigned int numberOfBCnodes,
-												real om1,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                real* DD,
+                                                int* k_Q,
+                                                real* QQ,
+                                                unsigned int numberOfBCnodes,
+                                                real om1,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //Schlaffer BCs
@@ -1210,7 +1236,7 @@ __global__ void PressSchlaff27(real* rhoBC,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
@@ -1225,9 +1251,106 @@ __global__ void VelSchlaff27(  int t,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
+__global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
+                                                int numberOfBCnodes,
+                                                int numberOfPrecursorNodes,
+                                                int sizeQ,
+                                                real omega,
+                                                real* distributions,
+                                                real* subgridDistances,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighborsNT,
+                                                uint* neighborsNB,
+                                                uint* neighborsST,
+                                                uint* neighborsSB,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* vLast,
+                                                real* vCurrent,
+                                                real velocityX,
+                                                real velocityY,
+                                                real velocityZ,
+                                                real timeRatio,
+                                                real velocityRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
+
+__global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
+                                        int numberOfBCnodes,
+                                        int numberOfPrecursorNodes,
+                                        real omega,
+                                        real* distributions,
+                                        uint* neighborX,
+                                        uint* neighborY,
+                                        uint* neighborZ,
+                                        uint* neighborsNT,
+                                        uint* neighborsNB,
+                                        uint* neighborsST,
+                                        uint* neighborsSB,
+                                        real* weights0PP,
+                                        real* weights0PM,
+                                        real* weights0MP,
+                                        real* weights0MM,
+                                        real* vLast,
+                                        real* vCurrent,
+                                        real velocityX,
+                                        real velocityY,
+                                        real velocityZ,
+                                        real timeRatio,
+                                        real velocityRatio,
+                                        unsigned long long numberOfLBnodes,
+                                        bool isEvenTimestep);
+
+__global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
+                                                int numberOfBCNodes,
+                                                int numberOfPrecursorNodes,
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighborsNT,
+                                                uint* neighborsNB,
+                                                uint* neighborsST,
+                                                uint* neighborsSB,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
+__global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
+                                                real* subgridDistances,
+                                                int sizeQ,
+                                                int numberOfBCNodes,
+                                                int numberOfPrecursorNodes,
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighborsNT,
+                                                uint* neighborsNB,
+                                                uint* neighborsST,
+                                                uint* neighborsSB,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
+
 //Advection / Diffusion BCs
 __global__ void QAD7( real* DD,
                                  real* DD7,
@@ -1240,68 +1363,68 @@ __global__ void QAD7( real* DD,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat,
+                                 unsigned long long numberOfLBnodes,
                                  bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref Advection_Diffusion_Device_Kernel : Factorized central moments for Advection Diffusion Equation
 __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
-	real omegaDiffusivity,
-	uint* typeOfGridNode,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	real* distributionsAD,
-	int size_Mat,
-	real* forces,
-	bool isEvenTimestep);
+    real omegaDiffusivity,
+    uint* typeOfGridNode,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    real* distributionsAD,
+    unsigned long long numberOfLBnodes,
+    real* forces,
+    bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref AD_SlipVelDeviceComp : device function for the slip-AD boundary condition
 __global__ void AD_SlipVelDeviceComp(
-	real * normalX,
-	real * normalY,
-	real * normalZ,
-	real * distributions,
-	real * distributionsAD,
-	int* QindexArray,
-	real * Qarrays,
-	uint numberOfBCnodes,
-	real omegaDiffusivity,
-	uint * neighborX,
-	uint * neighborY,
-	uint * neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real * normalX,
+    real * normalY,
+    real * normalZ,
+    real * distributions,
+    real * distributionsAD,
+    int* QindexArray,
+    real * Qarrays,
+    uint numberOfBCnodes,
+    real omegaDiffusivity,
+    uint * neighborX,
+    uint * neighborY,
+    uint * neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void QADDirichlet27(   real* DD,
-											 real* DD27,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD27,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             bool isEvenTimestep);
 
 __global__ void QADBB27(  real* DD,
-									 real* DD27,
-									 real* temp,
-									 real diffusivity,
-									 int* k_Q,
-									 real* QQ,
-									 unsigned int numberOfBCnodes,
-									 real om1,
-									 unsigned int* neighborX,
-									 unsigned int* neighborY,
-									 unsigned int* neighborZ,
-									 unsigned int size_Mat,
-									 bool isEvenTimestep);
+                                     real* DD27,
+                                     real* temp,
+                                     real diffusivity,
+                                     int* k_Q,
+                                     real* QQ,
+                                     unsigned int numberOfBCnodes,
+                                     real om1,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned long long numberOfLBnodes,
+                                     bool isEvenTimestep);
 
 __global__ void QADVel7( real* DD,
                                     real* DD7,
@@ -1315,7 +1438,7 @@ __global__ void QADVel7( real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QADVel27(real* DD,
@@ -1330,7 +1453,7 @@ __global__ void QADVel27(real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QADPress7(  real* DD,
@@ -1345,7 +1468,7 @@ __global__ void QADPress7(  real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        bool isEvenTimestep);
 
 __global__ void QADPress27( real* DD,
@@ -1360,109 +1483,109 @@ __global__ void QADPress27( real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        bool isEvenTimestep);
 
 __global__ void QADPressNEQNeighbor27(
-												 real* DD,
-												 real* DD27,
-												 int* k_Q,
-												 int* k_N,
-												 int numberOfBCnodes,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep
-												);
+                                                 real* DD,
+                                                 real* DD27,
+                                                 int* k_Q,
+                                                 int* k_N,
+                                                 int numberOfBCnodes,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned long long numberOfLBnodes,
+                                                 bool isEvenTimestep
+                                                );
 
 __global__ void QNoSlipADincomp7( real* DD,
-											 real* DD7,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD7,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             bool isEvenTimestep);
 
 __global__ void QNoSlipADincomp27( real* DD,
-											 real* DD27,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD27,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned long long numberOfLBnodes,
+                                             bool isEvenTimestep);
 
 __global__ void QADVeloIncomp7(  real* DD,
-											real* DD7,
-											real* temp,
-											real* velo,
-											real diffusivity,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* DD7,
+                                            real* temp,
+                                            real* velo,
+                                            real diffusivity,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void QADVeloIncomp27( real* DD,
-											real* DD27,
-											real* temp,
-											real* velo,
-											real diffusivity,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* DD27,
+                                            real* temp,
+                                            real* velo,
+                                            real diffusivity,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void QADPressIncomp7(real* DD,
-										   real* DD7,
-										   real* temp,
-										   real* velo,
-										   real diffusivity,
-										   int* k_Q,
-										   real* QQ,
-										   unsigned int numberOfBCnodes,
-										   real om1,
-										   unsigned int* neighborX,
-										   unsigned int* neighborY,
-										   unsigned int* neighborZ,
-										   unsigned int size_Mat,
-										   bool isEvenTimestep);
+                                           real* DD7,
+                                           real* temp,
+                                           real* velo,
+                                           real diffusivity,
+                                           int* k_Q,
+                                           real* QQ,
+                                           unsigned int numberOfBCnodes,
+                                           real om1,
+                                           unsigned int* neighborX,
+                                           unsigned int* neighborY,
+                                           unsigned int* neighborZ,
+                                           unsigned long long numberOfLBnodes,
+                                           bool isEvenTimestep);
 
 __global__ void QADPressIncomp27(   real* DD,
-											   real* DD27,
-											   real* temp,
-											   real* velo,
-											   real diffusivity,
-											   int* k_Q,
-											   real* QQ,
-											   unsigned int numberOfBCnodes,
-											   real om1,
-											   unsigned int* neighborX,
-											   unsigned int* neighborY,
-											   unsigned int* neighborZ,
-											   unsigned int size_Mat,
-											   bool isEvenTimestep);
+                                               real* DD27,
+                                               real* temp,
+                                               real* velo,
+                                               real diffusivity,
+                                               int* k_Q,
+                                               real* QQ,
+                                               unsigned int numberOfBCnodes,
+                                               real om1,
+                                               unsigned int* neighborX,
+                                               unsigned int* neighborY,
+                                               unsigned int* neighborZ,
+                                               unsigned long long numberOfLBnodes,
+                                               bool isEvenTimestep);
 
 //Propeller BC
 __global__ void PropellerBC(unsigned int* neighborX,
@@ -1473,8 +1596,8 @@ __global__ void PropellerBC(unsigned int* neighborX,
                                        real* uy,
                                        real* uz,
                                        int* k_Q,
-									   unsigned int size_Prop,
-                                       unsigned int size_Mat,
+                                       unsigned int size_Prop,
+                                       unsigned long long numberOfLBnodes,
                                        unsigned int* bcMatD,
                                        real* DD,
                                        bool EvenOrOdd);
@@ -1490,19 +1613,19 @@ __global__ void scaleCF27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-										       unsigned int size_MatC,
-										       unsigned int size_MatF,
-										       bool isEvenTimestep,
+                                               unsigned long long numberOfLBnodesC,
+                                               unsigned long long numberOfLBnodesF,
+                                               bool isEvenTimestep,
                                      unsigned int* posCSWB,
                                      unsigned int* posFSWB,
                                      unsigned int kCF,
-										       real omCoarse,
-										       real omFine,
-										       real nu,
-										       unsigned int nxC,
-										       unsigned int nyC,
-										       unsigned int nxF,
-										       unsigned int nyF);
+                                               real omCoarse,
+                                               real omFine,
+                                               real nu,
+                                               unsigned int nxC,
+                                               unsigned int nyC,
+                                               unsigned int nxF,
+                                               unsigned int nyF);
 
 __global__ void scaleCFEff27(real* DC,
                                         real* DF,
@@ -1512,18 +1635,18 @@ __global__ void scaleCFEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-									             unsigned int size_MatC,
-									             unsigned int size_MatF,
-									             bool isEvenTimestep,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
+                                                 bool isEvenTimestep,
                                         unsigned int* posCSWB,
                                         unsigned int* posFSWB,
                                         unsigned int kCF,
-									             real omCoarse,
-									             real omFine,
-									             real nu,
-									             unsigned int nxC,
-									             unsigned int nyC,
-									             unsigned int nxF,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
                                         unsigned int nyF,
                                         OffCF offCF);
 
@@ -1535,8 +1658,8 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1558,8 +1681,8 @@ __global__ void scaleCFpress27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1581,8 +1704,8 @@ __global__ void scaleCF_Fix_27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1597,233 +1720,233 @@ __global__ void scaleCF_Fix_27(real* DC,
                                           OffCF offCF);
 
 __global__ void scaleCF_Fix_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_0817_comp_27(  real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
-													  real* DF,
-													  real* G6,
-													  unsigned int* neighborCX,
-													  unsigned int* neighborCY,
-													  unsigned int* neighborCZ,
-													  unsigned int* neighborFX,
-													  unsigned int* neighborFY,
-													  unsigned int* neighborFZ,
-													  unsigned int size_MatC,
-													  unsigned int size_MatF,
-													  bool isEvenTimestep,
-													  unsigned int* posCSWB,
-													  unsigned int* posFSWB,
-													  unsigned int kCF,
-													  real omCoarse,
-													  real omFine,
-													  real nu,
-													  unsigned int nxC,
-													  unsigned int nyC,
-													  unsigned int nxF,
-													  unsigned int nyF,
-													  OffCF offCF);
+                                                      real* DF,
+                                                      real* G6,
+                                                      unsigned int* neighborCX,
+                                                      unsigned int* neighborCY,
+                                                      unsigned int* neighborCZ,
+                                                      unsigned int* neighborFX,
+                                                      unsigned int* neighborFY,
+                                                      unsigned int* neighborFZ,
+                                                      unsigned long long numberOfLBnodesC,
+                                                      unsigned long long numberOfLBnodesF,
+                                                      bool isEvenTimestep,
+                                                      unsigned int* posCSWB,
+                                                      unsigned int* posFSWB,
+                                                      unsigned int kCF,
+                                                      real omCoarse,
+                                                      real omFine,
+                                                      real nu,
+                                                      unsigned int nxC,
+                                                      unsigned int nyC,
+                                                      unsigned int nxF,
+                                                      unsigned int nyF,
+                                                      OffCF offCF);
 
 __global__ void scaleCF_comp_D3Q27F3( real* DC,
-												 real* DF,
-												 real* G6,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posCSWB,
-												 unsigned int* posFSWB,
-												 unsigned int kCF,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 OffCF offCF);
+                                                 real* DF,
+                                                 real* G6,
+                                                 unsigned int* neighborCX,
+                                                 unsigned int* neighborCY,
+                                                 unsigned int* neighborCZ,
+                                                 unsigned int* neighborFX,
+                                                 unsigned int* neighborFY,
+                                                 unsigned int* neighborFZ,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
+                                                 bool isEvenTimestep,
+                                                 unsigned int* posCSWB,
+                                                 unsigned int* posFSWB,
+                                                 unsigned int kCF,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
+                                                 unsigned int nyF,
+                                                 OffCF offCF);
 
 
 __global__ void scaleCF_staggered_time_comp_27(real* DC,
-														  real* DF,
-														  unsigned int* neighborCX,
-														  unsigned int* neighborCY,
-														  unsigned int* neighborCZ,
-														  unsigned int* neighborFX,
-														  unsigned int* neighborFY,
-														  unsigned int* neighborFZ,
-														  unsigned int size_MatC,
-														  unsigned int size_MatF,
-														  bool isEvenTimestep,
-														  unsigned int* posCSWB,
-														  unsigned int* posFSWB,
-														  unsigned int kCF,
-														  real omCoarse,
-														  real omFine,
-														  real nu,
-														  unsigned int nxC,
-														  unsigned int nyC,
-														  unsigned int nxF,
-														  unsigned int nyF,
-														  OffCF offCF);
+                                                          real* DF,
+                                                          unsigned int* neighborCX,
+                                                          unsigned int* neighborCY,
+                                                          unsigned int* neighborCZ,
+                                                          unsigned int* neighborFX,
+                                                          unsigned int* neighborFY,
+                                                          unsigned int* neighborFZ,
+                                                          unsigned long long numberOfLBnodesC,
+                                                          unsigned long long numberOfLBnodesF,
+                                                          bool isEvenTimestep,
+                                                          unsigned int* posCSWB,
+                                                          unsigned int* posFSWB,
+                                                          unsigned int kCF,
+                                                          real omCoarse,
+                                                          real omFine,
+                                                          real nu,
+                                                          unsigned int nxC,
+                                                          unsigned int nyC,
+                                                          unsigned int nxF,
+                                                          unsigned int nyF,
+                                                          OffCF offCF);
 
 __global__ void scaleCF_RhoSq_comp_27( real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_compressible(
-    real* distributionsCoarse, 
-    real* distributionsFine, 
+    real* distributionsCoarse,
+    real* distributionsFine,
     unsigned int* neighborXcoarse,
     unsigned int* neighborYcoarse,
     unsigned int* neighborZcoarse,
     unsigned int* neighborXfine,
     unsigned int* neighborYfine,
     unsigned int* neighborZfine,
-    unsigned int numberOfLBnodesCoarse, 
-    unsigned int numberOfLBnodesFine, 
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
-    unsigned int* indicesCoarseMMM, 
-    unsigned int* indicesFineMMM, 
-    unsigned int numberOfInterfaceNodes, 
-    real omegaCoarse, 
-    real omegaFine, 
+    unsigned int* indicesCoarseMMM,
+    unsigned int* indicesFineMMM,
+    unsigned int numberOfInterfaceNodes,
+    real omegaCoarse,
+    real omegaFine,
     OffCF offsetCF);
 
 __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
-														real* DF,
-														unsigned int* neighborCX,
-														unsigned int* neighborCY,
-														unsigned int* neighborCZ,
-														unsigned int* neighborFX,
-														unsigned int* neighborFY,
-														unsigned int* neighborFZ,
-														unsigned int size_MatC,
-														unsigned int size_MatF,
-														bool isEvenTimestep,
-														unsigned int* posCSWB,
-														unsigned int* posFSWB,
-														unsigned int kCF,
-														real omCoarse,
-														real omFine,
-														real nu,
-														unsigned int nxC,
-														unsigned int nyC,
-														unsigned int nxF,
-														unsigned int nyF,
-														OffCF offCF);
+                                                        real* DF,
+                                                        unsigned int* neighborCX,
+                                                        unsigned int* neighborCY,
+                                                        unsigned int* neighborCZ,
+                                                        unsigned int* neighborFX,
+                                                        unsigned int* neighborFY,
+                                                        unsigned int* neighborFZ,
+                                                        unsigned long long numberOfLBnodesC,
+                                                        unsigned long long numberOfLBnodesF,
+                                                        bool isEvenTimestep,
+                                                        unsigned int* posCSWB,
+                                                        unsigned int* posFSWB,
+                                                        unsigned int kCF,
+                                                        real omCoarse,
+                                                        real omFine,
+                                                        real nu,
+                                                        unsigned int nxC,
+                                                        unsigned int nyC,
+                                                        unsigned int nxF,
+                                                        unsigned int nyF,
+                                                        OffCF offCF);
 
 __global__ void scaleCF_AA2016_comp_27(real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_NSPress_27(real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posCSWB,
-											  unsigned int* posFSWB,
-											  unsigned int kCF,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  OffCF offCF);
+                                              real* DF,
+                                              unsigned int* neighborCX,
+                                              unsigned int* neighborCY,
+                                              unsigned int* neighborCZ,
+                                              unsigned int* neighborFX,
+                                              unsigned int* neighborFY,
+                                              unsigned int* neighborFZ,
+                                              unsigned long long numberOfLBnodesC,
+                                              unsigned long long numberOfLBnodesF,
+                                              bool isEvenTimestep,
+                                              unsigned int* posCSWB,
+                                              unsigned int* posFSWB,
+                                              unsigned int kCF,
+                                              real omCoarse,
+                                              real omFine,
+                                              real nu,
+                                              unsigned int nxC,
+                                              unsigned int nyC,
+                                              unsigned int nxF,
+                                              unsigned int nyF,
+                                              OffCF offCF);
 
 __global__ void scaleCFThSMG7( real* DC,
                                           real* DF,
@@ -1835,8 +1958,8 @@ __global__ void scaleCFThSMG7( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1855,8 +1978,8 @@ __global__ void scaleCFThS7(real* DC,
                                        unsigned int* neighborFX,
                                        unsigned int* neighborFY,
                                        unsigned int* neighborFZ,
-                                       unsigned int size_MatC,
-                                       unsigned int size_MatF,
+                                       unsigned long long numberOfLBnodesC,
+                                       unsigned long long numberOfLBnodesF,
                                        bool isEvenTimestep,
                                        unsigned int* posCSWB,
                                        unsigned int* posFSWB,
@@ -1874,15 +1997,15 @@ __global__ void scaleCFThS27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC,
-                                        unsigned int size_MatF,
+                                        unsigned long long numberOfLBnodesC,
+                                        unsigned long long numberOfLBnodesF,
                                         bool isEvenTimestep,
                                         unsigned int* posCSWB,
                                         unsigned int* posFSWB,
                                         unsigned int kCF,
                                         real nu,
                                         real diffusivity_fine,
-										OffCF offCF);
+                                        OffCF offCF);
 
 //fine to coarse
 __global__ void scaleFC27(real* DC,
@@ -1893,18 +2016,18 @@ __global__ void scaleFC27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-										       unsigned int size_MatC,
-										       unsigned int size_MatF,
-										       bool isEvenTimestep,
+                                               unsigned long long numberOfLBnodesC,
+                                               unsigned long long numberOfLBnodesF,
+                                               bool isEvenTimestep,
                                      unsigned int* posC,
                                      unsigned int* posFSWB,
                                      unsigned int kFC,
-										       real omCoarse,
-										       real omFine,
-										       real nu,
-										       unsigned int nxC,
-										       unsigned int nyC,
-										       unsigned int nxF,
+                                               real omCoarse,
+                                               real omFine,
+                                               real nu,
+                                               unsigned int nxC,
+                                               unsigned int nyC,
+                                               unsigned int nxF,
                                      unsigned int nyF);
 
 __global__ void scaleFCEff27(real* DC,
@@ -1915,8 +2038,8 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC,
-                                        unsigned int size_MatF,
+                                        unsigned long long numberOfLBnodesC,
+                                        unsigned long long numberOfLBnodesF,
                                         bool isEvenTimestep,
                                         unsigned int* posC,
                                         unsigned int* posFSWB,
@@ -1938,8 +2061,8 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -1961,8 +2084,8 @@ __global__ void scaleFCpress27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -1984,8 +2107,8 @@ __global__ void scaleFC_Fix_27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -2000,145 +2123,145 @@ __global__ void scaleFC_Fix_27( real* DC,
                                           OffFC offFC);
 
 __global__ void scaleFC_Fix_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_0817_comp_27(  real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
-													  real* DF,
-													  real* G6,
-													  unsigned int* neighborCX,
-													  unsigned int* neighborCY,
-													  unsigned int* neighborCZ,
-													  unsigned int* neighborFX,
-													  unsigned int* neighborFY,
-													  unsigned int* neighborFZ,
-													  unsigned int size_MatC,
-													  unsigned int size_MatF,
-													  bool isEvenTimestep,
-													  unsigned int* posC,
-													  unsigned int* posFSWB,
-													  unsigned int kFC,
-													  real omCoarse,
-													  real omFine,
-													  real nu,
-													  unsigned int nxC,
-													  unsigned int nyC,
-													  unsigned int nxF,
-													  unsigned int nyF,
-													  OffFC offFC);
+                                                      real* DF,
+                                                      real* G6,
+                                                      unsigned int* neighborCX,
+                                                      unsigned int* neighborCY,
+                                                      unsigned int* neighborCZ,
+                                                      unsigned int* neighborFX,
+                                                      unsigned int* neighborFY,
+                                                      unsigned int* neighborFZ,
+                                                      unsigned long long numberOfLBnodesC,
+                                                      unsigned long long numberOfLBnodesF,
+                                                      bool isEvenTimestep,
+                                                      unsigned int* posC,
+                                                      unsigned int* posFSWB,
+                                                      unsigned int kFC,
+                                                      real omCoarse,
+                                                      real omFine,
+                                                      real nu,
+                                                      unsigned int nxC,
+                                                      unsigned int nyC,
+                                                      unsigned int nxF,
+                                                      unsigned int nyF,
+                                                      OffFC offFC);
 
 __global__ void scaleFC_comp_D3Q27F3( real* DC,
-												 real* DF,
-												 real* G6,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posC,
-												 unsigned int* posFSWB,
-												 unsigned int kFC,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 OffFC offFC);
+                                                 real* DF,
+                                                 real* G6,
+                                                 unsigned int* neighborCX,
+                                                 unsigned int* neighborCY,
+                                                 unsigned int* neighborCZ,
+                                                 unsigned int* neighborFX,
+                                                 unsigned int* neighborFY,
+                                                 unsigned int* neighborFZ,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
+                                                 bool isEvenTimestep,
+                                                 unsigned int* posC,
+                                                 unsigned int* posFSWB,
+                                                 unsigned int kFC,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
+                                                 unsigned int nyF,
+                                                 OffFC offFC);
 
 
 __global__ void scaleFC_staggered_time_comp_27(real* DC,
-														  real* DF,
-														  unsigned int* neighborCX,
-														  unsigned int* neighborCY,
-														  unsigned int* neighborCZ,
-														  unsigned int* neighborFX,
-														  unsigned int* neighborFY,
-														  unsigned int* neighborFZ,
-														  unsigned int size_MatC,
-														  unsigned int size_MatF,
-														  bool isEvenTimestep,
-														  unsigned int* posC,
-														  unsigned int* posFSWB,
-														  unsigned int kFC,
-														  real omCoarse,
-														  real omFine,
-														  real nu,
-														  unsigned int nxC,
-														  unsigned int nyC,
-														  unsigned int nxF,
-														  unsigned int nyF,
-														  OffFC offFC);
+                                                          real* DF,
+                                                          unsigned int* neighborCX,
+                                                          unsigned int* neighborCY,
+                                                          unsigned int* neighborCZ,
+                                                          unsigned int* neighborFX,
+                                                          unsigned int* neighborFY,
+                                                          unsigned int* neighborFZ,
+                                                          unsigned long long numberOfLBnodesC,
+                                                          unsigned long long numberOfLBnodesF,
+                                                          bool isEvenTimestep,
+                                                          unsigned int* posC,
+                                                          unsigned int* posFSWB,
+                                                          unsigned int kFC,
+                                                          real omCoarse,
+                                                          real omFine,
+                                                          real nu,
+                                                          unsigned int nxC,
+                                                          unsigned int nyC,
+                                                          unsigned int nxF,
+                                                          unsigned int nyF,
+                                                          OffFC offFC);
 
 __global__ void scaleFC_RhoSq_comp_27( real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_compressible(
     real *distributionsCoarse,
@@ -2149,8 +2272,8 @@ __global__ void scaleFC_compressible(
     unsigned int *neighborXfine,
     unsigned int *neighborYfine,
     unsigned int *neighborZfine,
-    unsigned int numberOfLBnodesCoarse,
-    unsigned int numberOfLBnodesFine,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
     unsigned int *indicesCoarse000,
     unsigned int *indicesFineMMM,
@@ -2160,73 +2283,73 @@ __global__ void scaleFC_compressible(
     OffFC offsetFC);
 
 __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
-														real* DF,
-														unsigned int* neighborCX,
-														unsigned int* neighborCY,
-														unsigned int* neighborCZ,
-														unsigned int* neighborFX,
-														unsigned int* neighborFY,
-														unsigned int* neighborFZ,
-														unsigned int size_MatC,
-														unsigned int size_MatF,
-														bool isEvenTimestep,
-														unsigned int* posC,
-														unsigned int* posFSWB,
-														unsigned int kFC,
-														real omCoarse,
-														real omFine,
-														real nu,
-														unsigned int nxC,
-														unsigned int nyC,
-														unsigned int nxF,
-														unsigned int nyF,
-														OffFC offFC);
+                                                        real* DF,
+                                                        unsigned int* neighborCX,
+                                                        unsigned int* neighborCY,
+                                                        unsigned int* neighborCZ,
+                                                        unsigned int* neighborFX,
+                                                        unsigned int* neighborFY,
+                                                        unsigned int* neighborFZ,
+                                                        unsigned long long numberOfLBnodesC,
+                                                        unsigned long long numberOfLBnodesF,
+                                                        bool isEvenTimestep,
+                                                        unsigned int* posC,
+                                                        unsigned int* posFSWB,
+                                                        unsigned int kFC,
+                                                        real omCoarse,
+                                                        real omFine,
+                                                        real nu,
+                                                        unsigned int nxC,
+                                                        unsigned int nyC,
+                                                        unsigned int nxF,
+                                                        unsigned int nyF,
+                                                        OffFC offFC);
 
 __global__ void scaleFC_AA2016_comp_27(real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_NSPress_27(real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posC,
-											  unsigned int* posFSWB,
-											  unsigned int kFC,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  OffFC offFC);
+                                              real* DF,
+                                              unsigned int* neighborCX,
+                                              unsigned int* neighborCY,
+                                              unsigned int* neighborCZ,
+                                              unsigned int* neighborFX,
+                                              unsigned int* neighborFY,
+                                              unsigned int* neighborFZ,
+                                              unsigned long long numberOfLBnodesC,
+                                              unsigned long long numberOfLBnodesF,
+                                              bool isEvenTimestep,
+                                              unsigned int* posC,
+                                              unsigned int* posFSWB,
+                                              unsigned int kFC,
+                                              real omCoarse,
+                                              real omFine,
+                                              real nu,
+                                              unsigned int nxC,
+                                              unsigned int nyC,
+                                              unsigned int nxF,
+                                              unsigned int nyF,
+                                              OffFC offFC);
 
 __global__ void scaleFCThSMG7( real* DC,
                                           real* DF,
@@ -2238,8 +2361,8 @@ __global__ void scaleFCThSMG7( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -2258,8 +2381,8 @@ __global__ void scaleFCThS7(real* DC,
                                        unsigned int* neighborFX,
                                        unsigned int* neighborFY,
                                        unsigned int* neighborFZ,
-                                       unsigned int size_MatC,
-                                       unsigned int size_MatF,
+                                       unsigned long long numberOfLBnodesC,
+                                       unsigned long long numberOfLBnodesF,
                                        bool isEvenTimestep,
                                        unsigned int* posC,
                                        unsigned int* posFSWB,
@@ -2277,242 +2400,242 @@ __global__ void scaleFCThS27(  real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
                                           unsigned int kFC,
                                           real nu,
                                           real diffusivity_coarse,
-										  OffFC offFC);
+                                          OffFC offFC);
 
 __global__ void DragLiftPost27(  real* DD,
-											int* k_Q,
-											real* QQ,
-											int numberOfBCnodes,
-											double *DragX,
-											double *DragY,
-											double *DragZ,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            int* k_Q,
+                                            real* QQ,
+                                            int numberOfBCnodes,
+                                            double *DragX,
+                                            double *DragY,
+                                            double *DragZ,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void DragLiftPre27(   real* DD,
-											int* k_Q,
-											real* QQ,
-											int numberOfBCnodes,
-											double *DragX,
-											double *DragY,
-											double *DragZ,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            int* k_Q,
+                                            real* QQ,
+                                            int numberOfBCnodes,
+                                            double *DragX,
+                                            double *DragY,
+                                            double *DragZ,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void CalcCP27(real* DD,
-									int* cpIndex,
-									int nonCp,
-									double *cpPress,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep);
+                                    int* cpIndex,
+                                    int nonCp,
+                                    double *cpPress,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned long long numberOfLBnodes,
+                                    bool isEvenTimestep);
 
 __global__ void getSendFsPre27(real* DD,
-										  real* bufferFs,
-										  int* sendIndex,
+                                          real* bufferFs,
+                                          int* sendIndex,
                                           int buffmax,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void getSendFsPost27(real* DD,
-										   real* bufferFs,
-										   int* sendIndex,
+                                           real* bufferFs,
+                                           int* sendIndex,
                                            int buffmax,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void setRecvFsPre27(real* DD,
-										  real* bufferFs,
-										  int* recvIndex,
+                                          real* bufferFs,
+                                          int* recvIndex,
                                           int buffmax,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void setRecvFsPost27(real* DD,
-										   real* bufferFs,
-										   int* recvIndex,
+                                           real* bufferFs,
+                                           int* recvIndex,
                                            int buffmax,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void getSendGsF3(
-	real* G6,
-	real* bufferGs,
-	int* sendIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* G6,
+    real* bufferGs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void setRecvGsF3(
-	real* G6,
-	real* bufferGs,
-	int* recvIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* G6,
+    real* bufferGs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void WallFunction27( 	real* vx,
-											real* vy,
-											real* vz,
-											real* DD,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* vy,
+                                            real* vz,
+                                            real* DD,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned long long numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void LBSetOutputWallVelocitySP27( real* vxD,
-														real* vyD,
-														real* vzD,
-														real* vxWall,
-														real* vyWall,
-														real* vzWall,
-														int numberOfWallNodes,
-														int* kWallNodes,
-														real* rhoD,
-														real* pressD,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep);
+                                                        real* vyD,
+                                                        real* vzD,
+                                                        real* vxWall,
+                                                        real* vyWall,
+                                                        real* vzWall,
+                                                        int numberOfWallNodes,
+                                                        int* kWallNodes,
+                                                        real* rhoD,
+                                                        real* pressD,
+                                                        unsigned int* geoD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        unsigned long long numberOfLBnodes,
+                                                        real* DD,
+                                                        bool isEvenTimestep);
 
 __global__ void GetVeloforForcing27( real* DD,
-												int* bcIndex,
-												int nonAtBC,
-												real* Vx,
-												real* Vy,
-												real* Vz,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* bcIndex,
+                                                int nonAtBC,
+                                                real* Vx,
+                                                real* Vy,
+                                                real* Vz,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
 
 __global__ void InitParticles( real* coordX,
-										  real* coordY,
-										  real* coordZ,
-										  real* coordParticleXlocal,
-										  real* coordParticleYlocal,
-										  real* coordParticleZlocal,
-										  real* coordParticleXglobal,
-										  real* coordParticleYglobal,
-										  real* coordParticleZglobal,
-										  real* veloParticleX,
-										  real* veloParticleY,
-										  real* veloParticleZ,
-										  real* randArray,
-										  unsigned int* particleID,
-										  unsigned int* cellBaseID,
-										  unsigned int* bcMatD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int* neighborWSB,
-										  int level,
-									      unsigned int numberOfParticles,
-										  unsigned int size_Mat);
+                                          real* coordY,
+                                          real* coordZ,
+                                          real* coordParticleXlocal,
+                                          real* coordParticleYlocal,
+                                          real* coordParticleZlocal,
+                                          real* coordParticleXglobal,
+                                          real* coordParticleYglobal,
+                                          real* coordParticleZglobal,
+                                          real* veloParticleX,
+                                          real* veloParticleY,
+                                          real* veloParticleZ,
+                                          real* randArray,
+                                          unsigned int* particleID,
+                                          unsigned int* cellBaseID,
+                                          unsigned int* bcMatD,
+                                          unsigned int* neighborX,
+                                          unsigned int* neighborY,
+                                          unsigned int* neighborZ,
+                                          unsigned int* neighborWSB,
+                                          int level,
+                                          unsigned int numberOfParticles,
+                                          unsigned long long numberOfLBnodes);
 
 __global__ void MoveParticles( real* coordX,
-										  real* coordY,
-										  real* coordZ,
-										  real* coordParticleXlocal,
-										  real* coordParticleYlocal,
-										  real* coordParticleZlocal,
-										  real* coordParticleXglobal,
-										  real* coordParticleYglobal,
-										  real* coordParticleZglobal,
-										  real* veloParticleX,
-										  real* veloParticleY,
-										  real* veloParticleZ,
-										  real* DD,
-										  real  omega,
-										  unsigned int* particleID,
-										  unsigned int* cellBaseID,
-										  unsigned int* bcMatD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int* neighborWSB,
-										  int level,
-										  unsigned int timestep,
-										  unsigned int numberOfTimesteps,
-									      unsigned int numberOfParticles,
-										  unsigned int size_Mat,
-										  bool isEvenTimestep);
+                                          real* coordY,
+                                          real* coordZ,
+                                          real* coordParticleXlocal,
+                                          real* coordParticleYlocal,
+                                          real* coordParticleZlocal,
+                                          real* coordParticleXglobal,
+                                          real* coordParticleYglobal,
+                                          real* coordParticleZglobal,
+                                          real* veloParticleX,
+                                          real* veloParticleY,
+                                          real* veloParticleZ,
+                                          real* DD,
+                                          real  omega,
+                                          unsigned int* particleID,
+                                          unsigned int* cellBaseID,
+                                          unsigned int* bcMatD,
+                                          unsigned int* neighborX,
+                                          unsigned int* neighborY,
+                                          unsigned int* neighborZ,
+                                          unsigned int* neighborWSB,
+                                          int level,
+                                          unsigned int timestep,
+                                          unsigned int numberOfTimesteps,
+                                          unsigned int numberOfParticles,
+                                          unsigned long long numberOfLBnodes,
+                                          bool isEvenTimestep);
 
 __global__ void MoveParticlesWithoutBCs(   real* coordX,
-													  real* coordY,
-													  real* coordZ,
-													  real* coordParticleXlocal,
-													  real* coordParticleYlocal,
-													  real* coordParticleZlocal,
-													  real* coordParticleXglobal,
-													  real* coordParticleYglobal,
-													  real* coordParticleZglobal,
-													  real* veloParticleX,
-													  real* veloParticleY,
-													  real* veloParticleZ,
-													  real* DD,
-													  real  omega,
-													  unsigned int* particleID,
-													  unsigned int* cellBaseID,
-													  unsigned int* bcMatD,
-													  unsigned int* neighborX,
-													  unsigned int* neighborY,
-													  unsigned int* neighborZ,
-													  unsigned int* neighborWSB,
-													  int level,
-													  unsigned int timestep,
-													  unsigned int numberOfTimesteps,
-													  unsigned int numberOfParticles,
-													  unsigned int size_Mat,
-													  bool isEvenTimestep);
+                                                      real* coordY,
+                                                      real* coordZ,
+                                                      real* coordParticleXlocal,
+                                                      real* coordParticleYlocal,
+                                                      real* coordParticleZlocal,
+                                                      real* coordParticleXglobal,
+                                                      real* coordParticleYglobal,
+                                                      real* coordParticleZglobal,
+                                                      real* veloParticleX,
+                                                      real* veloParticleY,
+                                                      real* veloParticleZ,
+                                                      real* DD,
+                                                      real  omega,
+                                                      unsigned int* particleID,
+                                                      unsigned int* cellBaseID,
+                                                      unsigned int* bcMatD,
+                                                      unsigned int* neighborX,
+                                                      unsigned int* neighborY,
+                                                      unsigned int* neighborZ,
+                                                      unsigned int* neighborWSB,
+                                                      int level,
+                                                      unsigned int timestep,
+                                                      unsigned int numberOfTimesteps,
+                                                      unsigned int numberOfParticles,
+                                                      unsigned long long numberOfLBnodes,
+                                                      bool isEvenTimestep);
 
 __global__ void initRandom(curandState* state);
 
 __global__ void generateRandomValues(curandState* state,
-												real* randArray);
+                                                real* randArray);
 
 __global__ void CalcTurbulenceIntensity(
    real* vxx,
@@ -2529,7 +2652,7 @@ __global__ void CalcTurbulenceIntensity(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat,
+   unsigned long long numberOfLBnodes,
    bool isEvenTimestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
index 619d68c87d7a707e70be4c56d434191994144148..641d6519669b1522430fe88990c00d0630d00e9b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
@@ -22,8 +22,8 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -43,33 +43,33 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -78,63 +78,63 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -4091,8 +4091,8 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
 												  unsigned int* neighborFX,
 												  unsigned int* neighborFY,
 												  unsigned int* neighborFZ,
-												  unsigned int size_MatC, 
-												  unsigned int size_MatF, 
+												  unsigned long long numberOfLBnodesCoarse, 
+												  unsigned long long numberOfLBnodesFine, 
 												  bool isEvenTimestep,
 												  unsigned int* posCSWB, 
 												  unsigned int* posFSWB, 
@@ -4109,96 +4109,96 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -10974,8 +10974,8 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int* neighborFX,
 														unsigned int* neighborFY,
 														unsigned int* neighborFZ,
-														unsigned int size_MatC, 
-														unsigned int size_MatF, 
+														unsigned long long numberOfLBnodesCoarse, 
+														unsigned long long numberOfLBnodesFine, 
 														bool isEvenTimestep,
 														unsigned int* posCSWB, 
 														unsigned int* posFSWB, 
@@ -10992,96 +10992,96 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -17849,8 +17849,8 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -17867,96 +17867,96 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -22133,8 +22133,8 @@ __global__ void scaleCF_staggered_time_comp_27(   real* DC,
 															 unsigned int* neighborFX,
 															 unsigned int* neighborFY,
 															 unsigned int* neighborFZ,
-															 unsigned int size_MatC, 
-															 unsigned int size_MatF, 
+															 unsigned long long numberOfLBnodesCoarse, 
+															 unsigned long long numberOfLBnodesFine, 
 															 bool isEvenTimestep,
 															 unsigned int* posCSWB, 
 															 unsigned int* posFSWB, 
@@ -22151,96 +22151,96 @@ __global__ void scaleCF_staggered_time_comp_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -26369,8 +26369,8 @@ __global__ void scaleCF_Fix_comp_27(  real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -26387,96 +26387,96 @@ __global__ void scaleCF_Fix_comp_27(  real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -31136,8 +31136,8 @@ __global__ void scaleCF_NSPress_27(   real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -31154,96 +31154,96 @@ __global__ void scaleCF_NSPress_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -35080,8 +35080,8 @@ __global__ void scaleCF_Fix_27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -35098,96 +35098,96 @@ __global__ void scaleCF_Fix_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -39338,8 +39338,8 @@ __global__ void scaleCFpress27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -39356,96 +39356,96 @@ __global__ void scaleCFpress27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -41012,8 +41012,8 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB, 
                                           unsigned int* posFSWB, 
@@ -41030,96 +41030,96 @@ __global__ void scaleCFLast27( real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -43249,8 +43249,8 @@ __global__ void scaleCFThSMG7(    real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -43261,128 +43261,128 @@ __global__ void scaleCFThSMG7(    real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
                       
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -44476,8 +44476,8 @@ __global__ void scaleCFThS7(   real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB, 
                                           unsigned int* posFSWB, 
@@ -44487,128 +44487,128 @@ __global__ void scaleCFThS7(   real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
                       
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -45599,8 +45599,8 @@ __global__ void scaleCFThS27(     real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -45611,188 +45611,188 @@ __global__ void scaleCFThS27(     real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions27 D27F;
-   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
-   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
-   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
-   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
-   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
-   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
-   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
-   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
-   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
-   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
-   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
-   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
-   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
-   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
-   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
-   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
-   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
-   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
-   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
-   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
-   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
-   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
-   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
-   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
-   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
-   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
-   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
+   D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine];
+   D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine];
+   D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine];
+   D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine];
+   D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine];
+   D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine];
+   D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine];
+   D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine];
+   D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine];
+   D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine];
+   D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine];
+   D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine];
+   D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine];
+   D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine];
+   D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine];
+   D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine];
+   D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine];
+   D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine];
+   D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine];
+   D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine];
+   D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine];
+   D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine];
+   D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine];
+   D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine];
+   D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine];
+   D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine];
+   D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
    else
    {
-      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -45892,33 +45892,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -45979,33 +45979,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46066,33 +46066,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46153,33 +46153,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46250,33 +46250,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46337,33 +46337,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46424,33 +46424,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46511,33 +46511,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       ////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46656,32 +46656,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46734,32 +46734,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46812,32 +46812,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46890,32 +46890,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46978,32 +46978,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47056,32 +47056,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47134,32 +47134,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47212,32 +47212,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -47287,8 +47287,8 @@ __global__ void scaleCFEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-									             unsigned int size_MatC, 
-									             unsigned int size_MatF, 
+									             unsigned long long numberOfLBnodesCoarse, 
+									             unsigned long long numberOfLBnodesFine, 
 									             bool isEvenTimestep,
                                         unsigned int* posCSWB, 
                                         unsigned int* posFSWB, 
@@ -47305,96 +47305,96 @@ __global__ void scaleCFEff27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -48997,8 +48997,8 @@ __global__ void scaleCF27(real* DC,
                                      unsigned int* neighborFX,
                                      unsigned int* neighborFY,
                                      unsigned int* neighborFZ,
-                                     unsigned int size_MatC, 
-                                     unsigned int size_MatF, 
+                                     unsigned long long numberOfLBnodesCoarse, 
+                                     unsigned long long numberOfLBnodesFine, 
                                      bool isEvenTimestep,
                                      unsigned int* posCSWB, 
                                      unsigned int* posFSWB, 
@@ -49014,96 +49014,96 @@ __global__ void scaleCF27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
index cb8bd2a322cc9176cd0aa31625ee386e1f62d63d..386493280fd71fff93c117483e754a248bb0830d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
@@ -23,8 +23,8 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 													 unsigned int* neighborFX,
 													 unsigned int* neighborFY,
 													 unsigned int* neighborFZ,
-													 unsigned int size_MatC, 
-													 unsigned int size_MatF, 
+													 unsigned long long numberOfLBnodesCoarse, 
+													 unsigned long long numberOfLBnodesFine, 
 													 bool isEvenTimestep,
 													 unsigned int* posCSWB, 
 													 unsigned int* posFSWB, 
@@ -44,33 +44,33 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -79,72 +79,72 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
 
 	Distributions6 G;
-	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
-	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
-	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
-	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
-	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
-	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine];
+	G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine];
+	G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine];
+	G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine];
+	G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine];
+	G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -4370,8 +4370,8 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -4391,33 +4391,33 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -4426,72 +4426,72 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
 
 	Distributions6 G;
-	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
-	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
-	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
-	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
-	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
-	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine];
+	G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine];
+	G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine];
+	G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine];
+	G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine];
+	G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
index f2a66876cf39e3519e22fc2b0e236514f05ce85a..b37ab44d81d15fbbde46c875c860acd7198b8041 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
@@ -22,8 +22,8 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -43,33 +43,33 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -78,63 +78,63 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -1218,8 +1218,8 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
 												  unsigned int* neighborFX,
 												  unsigned int* neighborFY,
 												  unsigned int* neighborFZ,
-												  unsigned int size_MatC, 
-												  unsigned int size_MatF, 
+												  unsigned long long numberOfLBnodesCoarse, 
+												  unsigned long long numberOfLBnodesFine, 
 												  bool isEvenTimestep,
 												  unsigned int* posC, 
 												  unsigned int* posFSWB, 
@@ -1236,96 +1236,96 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -5407,8 +5407,8 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int* neighborFX,
 														unsigned int* neighborFY,
 														unsigned int* neighborFZ,
-														unsigned int size_MatC, 
-														unsigned int size_MatF, 
+														unsigned long long numberOfLBnodesCoarse, 
+														unsigned long long numberOfLBnodesFine, 
 														bool isEvenTimestep,
 														unsigned int* posC, 
 														unsigned int* posFSWB, 
@@ -5425,96 +5425,96 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -9587,103 +9587,120 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-__device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY,
-                                                  unsigned int *neighborCZ, unsigned int *neighborFX, unsigned int *neighborFY,
-                                                  unsigned int *neighborFZ, unsigned int size_MatC, unsigned int size_MatF,
-                                                  bool isEvenTimestep, unsigned int *posC, unsigned int *posFSWB, unsigned int kFC,
-                                                  real omCoarse, real omFine, real nu, unsigned int nxC, unsigned int nyC,
-                                                  unsigned int nxF, unsigned int nyF, OffFC offFC, const unsigned k)
+__device__ void scaleFC_RhoSq_comp_27_Calculation(
+    real *DC, real *DF, 
+    unsigned int *neighborCX,
+    unsigned int *neighborCY,
+    unsigned int *neighborCZ,
+    unsigned int *neighborFX,
+    unsigned int *neighborFY,
+    unsigned int *neighborFZ,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
+    bool isEvenTimestep,
+    unsigned int *posC,
+    unsigned int *posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    OffFC offFC,
+    const unsigned k)
 {
     real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF,
         *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-    feF    = &DF[DIR_P00 * size_MatF];
-    fwF    = &DF[DIR_M00 * size_MatF];
-    fnF    = &DF[DIR_0P0 * size_MatF];
-    fsF    = &DF[DIR_0M0 * size_MatF];
-    ftF    = &DF[DIR_00P * size_MatF];
-    fbF    = &DF[DIR_00M * size_MatF];
-    fneF   = &DF[DIR_PP0 * size_MatF];
-    fswF   = &DF[DIR_MM0 * size_MatF];
-    fseF   = &DF[DIR_PM0 * size_MatF];
-    fnwF   = &DF[DIR_MP0 * size_MatF];
-    fteF   = &DF[DIR_P0P * size_MatF];
-    fbwF   = &DF[DIR_M0M * size_MatF];
-    fbeF   = &DF[DIR_P0M * size_MatF];
-    ftwF   = &DF[DIR_M0P * size_MatF];
-    ftnF   = &DF[DIR_0PP * size_MatF];
-    fbsF   = &DF[DIR_0MM * size_MatF];
-    fbnF   = &DF[DIR_0PM * size_MatF];
-    ftsF   = &DF[DIR_0MP * size_MatF];
-    fzeroF = &DF[DIR_000 * size_MatF];
-    ftneF  = &DF[DIR_PPP * size_MatF];
-    ftswF  = &DF[DIR_MMP * size_MatF];
-    ftseF  = &DF[DIR_PMP * size_MatF];
-    ftnwF  = &DF[DIR_MPP * size_MatF];
-    fbneF  = &DF[DIR_PPM * size_MatF];
-    fbswF  = &DF[DIR_MMM * size_MatF];
-    fbseF  = &DF[DIR_PMM * size_MatF];
-    fbnwF  = &DF[DIR_MPM * size_MatF];
+    feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+    fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+    fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+    fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+    ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+    fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+    fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+    fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+    fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+    fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+    fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+    fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+    fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+    ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+    ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+    fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+    fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+    ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+    fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+    ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+    ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+    ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+    ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+    fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+    fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+    fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+    fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
     real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC,
         *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
     if (isEvenTimestep == true) {
-        feC    = &DC[DIR_P00 * size_MatC];
-        fwC    = &DC[DIR_M00 * size_MatC];
-        fnC    = &DC[DIR_0P0 * size_MatC];
-        fsC    = &DC[DIR_0M0 * size_MatC];
-        ftC    = &DC[DIR_00P * size_MatC];
-        fbC    = &DC[DIR_00M * size_MatC];
-        fneC   = &DC[DIR_PP0 * size_MatC];
-        fswC   = &DC[DIR_MM0 * size_MatC];
-        fseC   = &DC[DIR_PM0 * size_MatC];
-        fnwC   = &DC[DIR_MP0 * size_MatC];
-        fteC   = &DC[DIR_P0P * size_MatC];
-        fbwC   = &DC[DIR_M0M * size_MatC];
-        fbeC   = &DC[DIR_P0M * size_MatC];
-        ftwC   = &DC[DIR_M0P * size_MatC];
-        ftnC   = &DC[DIR_0PP * size_MatC];
-        fbsC   = &DC[DIR_0MM * size_MatC];
-        fbnC   = &DC[DIR_0PM * size_MatC];
-        ftsC   = &DC[DIR_0MP * size_MatC];
-        fzeroC = &DC[DIR_000 * size_MatC];
-        ftneC  = &DC[DIR_PPP * size_MatC];
-        ftswC  = &DC[DIR_MMP * size_MatC];
-        ftseC  = &DC[DIR_PMP * size_MatC];
-        ftnwC  = &DC[DIR_MPP * size_MatC];
-        fbneC  = &DC[DIR_PPM * size_MatC];
-        fbswC  = &DC[DIR_MMM * size_MatC];
-        fbseC  = &DC[DIR_PMM * size_MatC];
-        fbnwC  = &DC[DIR_MPM * size_MatC];
+        feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+        fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+        fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+        fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+        ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+        fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+        fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+        fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+        fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+        fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+        fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+        fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+        fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+        ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+        ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+        fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+        fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+        ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+        fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+        ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+        ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+        ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+        ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+        fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+        fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+        fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+        fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
     } else {
-        fwC    = &DC[DIR_P00 * size_MatC];
-        feC    = &DC[DIR_M00 * size_MatC];
-        fsC    = &DC[DIR_0P0 * size_MatC];
-        fnC    = &DC[DIR_0M0 * size_MatC];
-        fbC    = &DC[DIR_00P * size_MatC];
-        ftC    = &DC[DIR_00M * size_MatC];
-        fswC   = &DC[DIR_PP0 * size_MatC];
-        fneC   = &DC[DIR_MM0 * size_MatC];
-        fnwC   = &DC[DIR_PM0 * size_MatC];
-        fseC   = &DC[DIR_MP0 * size_MatC];
-        fbwC   = &DC[DIR_P0P * size_MatC];
-        fteC   = &DC[DIR_M0M * size_MatC];
-        ftwC   = &DC[DIR_P0M * size_MatC];
-        fbeC   = &DC[DIR_M0P * size_MatC];
-        fbsC   = &DC[DIR_0PP * size_MatC];
-        ftnC   = &DC[DIR_0MM * size_MatC];
-        ftsC   = &DC[DIR_0PM * size_MatC];
-        fbnC   = &DC[DIR_0MP * size_MatC];
-        fzeroC = &DC[DIR_000 * size_MatC];
-        fbswC  = &DC[DIR_PPP * size_MatC];
-        fbneC  = &DC[DIR_MMP * size_MatC];
-        fbnwC  = &DC[DIR_PMP * size_MatC];
-        fbseC  = &DC[DIR_MPP * size_MatC];
-        ftswC  = &DC[DIR_PPM * size_MatC];
-        ftneC  = &DC[DIR_MMM * size_MatC];
-        ftnwC  = &DC[DIR_PMM * size_MatC];
-        ftseC  = &DC[DIR_MPM * size_MatC];
+        fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+        feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+        fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+        fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+        fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+        ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+        fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+        fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+        fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+        fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+        fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+        fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+        ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+        fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+        fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+        ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+        ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+        fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+        fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+        fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+        fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+        fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+        fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+        ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+        ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+        ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+        ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
     }
 
     ////////////////////////////////////////////////////////////////////////////////
@@ -11064,8 +11081,8 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -11091,7 +11108,7 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC,
    //////////////////////////////////////////////////////////////////////////
 
    scaleFC_RhoSq_comp_27_Calculation(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, neighborFZ,
-                                     size_MatC, size_MatF, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC,
+                                     numberOfLBnodesCoarse, numberOfLBnodesFine, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC,
                                      nyC, nxF, nyF, offFC, k);
 }
 
@@ -11157,8 +11174,8 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 															 unsigned int* neighborFX,
 															 unsigned int* neighborFY,
 															 unsigned int* neighborFZ,
-															 unsigned int size_MatC, 
-															 unsigned int size_MatF, 
+															 unsigned long long numberOfLBnodesCoarse, 
+															 unsigned long long numberOfLBnodesFine, 
 															 bool isEvenTimestep,
 															 unsigned int* posC, 
 															 unsigned int* posFSWB, 
@@ -11175,96 +11192,96 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -11755,827 +11772,6 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
       kxxMyyFromfcNEQ_NEB = -c3o2*omegaS *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB)));
       kxxMzzFromfcNEQ_NEB = -c3o2*omegaS *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB)));
 
-   //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  ////pointertausch
-	  // if (isEvenTimestep==false)
-	  // {
-		 // feC    = &DC[DIR_P00   *size_MatC];
-		 // fwC    = &DC[DIR_M00   *size_MatC];
-		 // fnC    = &DC[DIR_0P0   *size_MatC];
-		 // fsC    = &DC[DIR_0M0   *size_MatC];
-		 // ftC    = &DC[DIR_00P   *size_MatC];
-		 // fbC    = &DC[DIR_00M   *size_MatC];
-		 // fneC   = &DC[DIR_PP0  *size_MatC];
-		 // fswC   = &DC[DIR_MM0  *size_MatC];
-		 // fseC   = &DC[DIR_PM0  *size_MatC];
-		 // fnwC   = &DC[DIR_MP0  *size_MatC];
-		 // fteC   = &DC[DIR_P0P  *size_MatC];
-		 // fbwC   = &DC[DIR_M0M  *size_MatC];
-		 // fbeC   = &DC[DIR_P0M  *size_MatC];
-		 // ftwC   = &DC[DIR_M0P  *size_MatC];
-		 // ftnC   = &DC[DIR_0PP  *size_MatC];
-		 // fbsC   = &DC[DIR_0MM  *size_MatC];
-		 // fbnC   = &DC[DIR_0PM  *size_MatC];
-		 // ftsC   = &DC[DIR_0MP  *size_MatC];
-		 // fzeroC = &DC[DIR_000*size_MatC];
-		 // ftneC  = &DC[DIR_PPP *size_MatC];
-		 // ftswC  = &DC[DIR_MMP *size_MatC];
-		 // ftseC  = &DC[DIR_PMP *size_MatC];
-		 // ftnwC  = &DC[DIR_MPP *size_MatC];
-		 // fbneC  = &DC[DIR_PPM *size_MatC];
-		 // fbswC  = &DC[DIR_MMM *size_MatC];
-		 // fbseC  = &DC[DIR_PMM *size_MatC];
-		 // fbnwC  = &DC[DIR_MPM *size_MatC];
-	  // } 
-	  // else
-	  // {
-		 // fwC    = &DC[DIR_P00   *size_MatC];
-		 // feC    = &DC[DIR_M00   *size_MatC];
-		 // fsC    = &DC[DIR_0P0   *size_MatC];
-		 // fnC    = &DC[DIR_0M0   *size_MatC];
-		 // fbC    = &DC[DIR_00P   *size_MatC];
-		 // ftC    = &DC[DIR_00M   *size_MatC];
-		 // fswC   = &DC[DIR_PP0  *size_MatC];
-		 // fneC   = &DC[DIR_MM0  *size_MatC];
-		 // fnwC   = &DC[DIR_PM0  *size_MatC];
-		 // fseC   = &DC[DIR_MP0  *size_MatC];
-		 // fbwC   = &DC[DIR_P0P  *size_MatC];
-		 // fteC   = &DC[DIR_M0M  *size_MatC];
-		 // ftwC   = &DC[DIR_P0M  *size_MatC];
-		 // fbeC   = &DC[DIR_M0P  *size_MatC];
-		 // fbsC   = &DC[DIR_0PP  *size_MatC];
-		 // ftnC   = &DC[DIR_0MM  *size_MatC];
-		 // ftsC   = &DC[DIR_0PM  *size_MatC];
-		 // fbnC   = &DC[DIR_0MP  *size_MatC];
-		 // fzeroC = &DC[DIR_000*size_MatC];
-		 // fbswC  = &DC[DIR_PPP *size_MatC];
-		 // fbneC  = &DC[DIR_MMP *size_MatC];
-		 // fbnwC  = &DC[DIR_PMP *size_MatC];
-		 // fbseC  = &DC[DIR_MPP *size_MatC];
-		 // ftswC  = &DC[DIR_PPM *size_MatC];
-		 // ftneC  = &DC[DIR_MMM *size_MatC];
-		 // ftnwC  = &DC[DIR_PMM *size_MatC];
-		 // ftseC  = &DC[DIR_MPM *size_MatC];
-	  // }
-
- 	 // real rho_tmp;
-	  //real vx1_tmp;
-	  //real vx2_tmp;
-	  //real vx3_tmp;
-
-   //  //////////////////////////////////////////////////////////////////////////
-   //   xoff = offFC.xOffFC[k];
-   //   yoff = offFC.yOffFC[k];
-   //   zoff = offFC.zOffFC[k];      
-   //   xoff_sq = xoff * xoff;
-   //   yoff_sq = yoff * yoff;
-   //   zoff_sq = zoff * zoff;
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SWB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 0
-   //   k0zero= posFSWB[k];
-   //   k0w   = neighborFX[k0zero];
-   //   k0s   = neighborFY[k0zero];
-   //   k0b   = neighborFZ[k0zero];
-   //   k0sw  = neighborFY[k0w];
-   //   k0bw  = neighborFZ[k0w];
-   //   k0bs  = neighborFZ[k0s];
-   //   k0bsw = neighborFZ[k0sw];
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= k0zero;
-   //   kw   = k0w;   
-   //   ks   = k0s;   
-   //   kb   = k0b;   
-   //   ksw  = k0sw;  
-   //   kbw  = k0bw;  
-   //   kbs  = k0bs;  
-   //   kbsw = k0bsw; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SWB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWB);
-	  ////vx2_SWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWB);
-	  ////vx3_SWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWB);
-
-   //   //kxyFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx2_SWB)));
-   //   //kyzFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWB) - ((vx2_SWB*vx3_SWB)));
-   //   //kxzFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx3_SWB)));
-   //   //kxxMyyFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx2_SWB*vx2_SWB)));
-   //   //kxxMzzFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx3_SWB*vx3_SWB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SWB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SWB  += vx1_tmp;
-	  //vx2_SWB  += vx2_tmp;
-	  //vx3_SWB  += vx3_tmp;
-
-   //   drho_SWB *= c1o2;
-   //   vx1_SWB  *= c1o2;
-	  //vx2_SWB  *= c1o2;
-	  //vx3_SWB  *= c1o2;
-
-   //   kxyFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SWB    *= c1o2;
-	  //kyzFromfcNEQ_SWB    *= c1o2;
-	  //kxzFromfcNEQ_SWB    *= c1o2;
-	  //kxxMyyFromfcNEQ_SWB *= c1o2;
-	  //kxxMzzFromfcNEQ_SWB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SWT//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kb;
-   //   kw   = kbw;   
-   //   ks   = kbs;   
-   //   kb   = neighborFZ[kb];   
-   //   ksw  = kbsw;  
-   //   kbw  = neighborFZ[kbw];  
-   //   kbs  = neighborFZ[kbs];  
-   //   kbsw = neighborFZ[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SWT  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWT);
-	  ////vx2_SWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWT);
-	  ////vx3_SWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWT);
-
-   //   //kxyFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx2_SWT)));
-   //   //kyzFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWT) - ((vx2_SWT*vx3_SWT)));
-   //   //kxzFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx3_SWT)));
-   //   //kxxMyyFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx2_SWT*vx2_SWT)));
-   //   //kxxMzzFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx3_SWT*vx3_SWT)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SWT += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SWT  += vx1_tmp;
-	  //vx2_SWT  += vx2_tmp;
-	  //vx3_SWT  += vx3_tmp;
-
-   //   drho_SWT *= c1o2;
-   //   vx1_SWT  *= c1o2;
-	  //vx2_SWT  *= c1o2;
-	  //vx3_SWT  *= c1o2;
-
-   //   kxyFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SWT    *= c1o2;
-	  //kyzFromfcNEQ_SWT    *= c1o2;
-	  //kxzFromfcNEQ_SWT    *= c1o2;
-	  //kxxMyyFromfcNEQ_SWT *= c1o2;
-	  //kxxMzzFromfcNEQ_SWT *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SET//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kw;
-   //   kw   = neighborFX[kw];   
-   //   ks   = ksw;   
-   //   kb   = kbw;   
-   //   ksw  = neighborFX[ksw];  
-   //   kbw  = neighborFX[kbw];  
-   //   kbs  = kbsw;  
-   //   kbsw = neighborFX[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SET  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SET);
-	  ////vx2_SET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SET);
-	  ////vx3_SET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SET);
-
-   //   //kxyFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx2_SET)));
-   //   //kyzFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SET) - ((vx2_SET*vx3_SET)));
-   //   //kxzFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx3_SET)));
-   //   //kxxMyyFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx2_SET*vx2_SET)));
-   //   //kxxMzzFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx3_SET*vx3_SET)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SET += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SET  += vx1_tmp;
-	  //vx2_SET  += vx2_tmp;
-	  //vx3_SET  += vx3_tmp;
-
-   //   drho_SET *= c1o2;
-   //   vx1_SET  *= c1o2;
-	  //vx2_SET  *= c1o2;
-	  //vx3_SET  *= c1o2;
-
-   //   kxyFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SET    *= c1o2;
-	  //kyzFromfcNEQ_SET    *= c1o2;
-	  //kxzFromfcNEQ_SET    *= c1o2;
-	  //kxxMyyFromfcNEQ_SET *= c1o2;
-	  //kxxMzzFromfcNEQ_SET *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SEB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kb   = kzero;   
-   //   kbw  = kw;  
-   //   kbs  = ks;  
-   //   kbsw = ksw; 
-   //   kzero= k0w;
-   //   kw   = neighborFX[k0w];   
-   //   ks   = k0sw;   
-   //   ksw  = neighborFX[k0sw];  
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SEB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SEB);
-	  ////vx2_SEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SEB);
-	  ////vx3_SEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SEB);
-
-   //   //kxyFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx2_SEB)));
-   //   //kyzFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SEB) - ((vx2_SEB*vx3_SEB)));
-   //   //kxzFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx3_SEB)));
-   //   //kxxMyyFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx2_SEB*vx2_SEB)));
-   //   //kxxMzzFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx3_SEB*vx3_SEB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SEB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SEB  += vx1_tmp;
-	  //vx2_SEB  += vx2_tmp;
-	  //vx3_SEB  += vx3_tmp;
-
-   //   drho_SEB *= c1o2;
-   //   vx1_SEB  *= c1o2;
-	  //vx2_SEB  *= c1o2;
-	  //vx3_SEB  *= c1o2;
-
-   //   kxyFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SEB    *= c1o2;
-	  //kyzFromfcNEQ_SEB    *= c1o2;
-	  //kxzFromfcNEQ_SEB    *= c1o2;
-	  //kxxMyyFromfcNEQ_SEB *= c1o2;
-	  //kxxMzzFromfcNEQ_SEB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NWB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 0
-   //   k0zero= k0s;
-   //   k0w   = k0sw;
-   //   k0s   = neighborFY[k0s];
-   //   k0b   = k0bs;
-   //   k0sw  = neighborFY[k0sw];
-   //   k0bw  = k0bsw;
-   //   k0bs  = neighborFY[k0bs];
-   //   k0bsw = neighborFY[k0bsw];
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= k0zero;
-   //   kw   = k0w;   
-   //   ks   = k0s;   
-   //   kb   = k0b;   
-   //   ksw  = k0sw;  
-   //   kbw  = k0bw;  
-   //   kbs  = k0bs;  
-   //   kbsw = k0bsw; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NWB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWB);
-	  ////vx2_NWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWB);
-	  ////vx3_NWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWB);
-
-   //   //kxyFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx2_NWB)));
-   //   //kyzFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWB) - ((vx2_NWB*vx3_NWB)));
-   //   //kxzFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx3_NWB)));
-   //   //kxxMyyFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx2_NWB*vx2_NWB)));
-   //   //kxxMzzFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx3_NWB*vx3_NWB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NWB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NWB  += vx1_tmp;
-	  //vx2_NWB  += vx2_tmp;
-	  //vx3_NWB  += vx3_tmp;
-
-   //   drho_NWB *= c1o2;
-   //   vx1_NWB  *= c1o2;
-	  //vx2_NWB  *= c1o2;
-	  //vx3_NWB  *= c1o2;
-
-   //   kxyFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NWB    *= c1o2;
-	  //kyzFromfcNEQ_NWB    *= c1o2;
-	  //kxzFromfcNEQ_NWB    *= c1o2;
-	  //kxxMyyFromfcNEQ_NWB *= c1o2;
-	  //kxxMzzFromfcNEQ_NWB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NWT//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kb;
-   //   kw   = kbw;   
-   //   ks   = kbs;   
-   //   kb   = neighborFZ[kb];   
-   //   ksw  = kbsw;  
-   //   kbw  = neighborFZ[kbw];  
-   //   kbs  = neighborFZ[kbs];  
-   //   kbsw = neighborFZ[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NWT  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWT);
-	  ////vx2_NWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWT);
-	  ////vx3_NWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWT);
-
-   //   //kxyFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx2_NWT)));
-   //   //kyzFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWT) - ((vx2_NWT*vx3_NWT)));
-   //   //kxzFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx3_NWT)));
-   //   //kxxMyyFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx2_NWT*vx2_NWT)));
-   //   //kxxMzzFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx3_NWT*vx3_NWT)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NWT += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NWT  += vx1_tmp;
-	  //vx2_NWT  += vx2_tmp;
-	  //vx3_NWT  += vx3_tmp;
-
-   //   drho_NWT *= c1o2;
-   //   vx1_NWT  *= c1o2;
-	  //vx2_NWT  *= c1o2;
-	  //vx3_NWT  *= c1o2;
-
-   //   kxyFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NWT    *= c1o2;
-	  //kyzFromfcNEQ_NWT    *= c1o2;
-	  //kxzFromfcNEQ_NWT    *= c1o2;
-	  //kxxMyyFromfcNEQ_NWT *= c1o2;
-	  //kxxMzzFromfcNEQ_NWT *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NET//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kw;
-   //   kw   = neighborFX[kw];   
-   //   ks   = ksw;   
-   //   kb   = kbw;   
-   //   ksw  = neighborFX[ksw];  
-   //   kbw  = neighborFX[kbw];  
-   //   kbs  = kbsw;  
-   //   kbsw = neighborFX[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NET  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NET);
-	  ////vx2_NET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NET);
-	  ////vx3_NET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NET);
-
-   //   //kxyFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx2_NET)));
-   //   //kyzFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NET) - ((vx2_NET*vx3_NET)));
-   //   //kxzFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx3_NET)));
-   //   //kxxMyyFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx2_NET*vx2_NET)));
-   //   //kxxMzzFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx3_NET*vx3_NET)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NET += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NET  += vx1_tmp;
-	  //vx2_NET  += vx2_tmp;
-	  //vx3_NET  += vx3_tmp;
-
-   //   drho_NET *= c1o2;
-   //   vx1_NET  *= c1o2;
-	  //vx2_NET  *= c1o2;
-	  //vx3_NET  *= c1o2;
-
-   //   kxyFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NET    *= c1o2;
-	  //kyzFromfcNEQ_NET    *= c1o2;
-	  //kxzFromfcNEQ_NET    *= c1o2;
-	  //kxxMyyFromfcNEQ_NET *= c1o2;
-	  //kxxMzzFromfcNEQ_NET *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NEB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kb   = kzero;   
-   //   kbw  = kw;  
-   //   kbs  = ks;  
-   //   kbsw = ksw; 
-   //   kzero= k0w;
-   //   kw   = neighborFX[k0w];   
-   //   ks   = k0sw;   
-   //   ksw  = neighborFX[k0sw];  
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NEB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NEB);
-	  ////vx2_NEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NEB);
-	  ////vx3_NEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NEB);
-
-   //   //kxyFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx2_NEB)));
-   //   //kyzFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NEB) - ((vx2_NEB*vx3_NEB)));
-   //   //kxzFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx3_NEB)));
-   //   //kxxMyyFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB)));
-   //   //kxxMzzFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NEB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NEB  += vx1_tmp;
-	  //vx2_NEB  += vx2_tmp;
-	  //vx3_NEB  += vx3_tmp;
-
-   //   drho_NEB *= c1o2;
-   //   vx1_NEB  *= c1o2;
-	  //vx2_NEB  *= c1o2;
-	  //vx3_NEB  *= c1o2;
-
-   //   kxyFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NEB    *= c1o2;
-	  //kyzFromfcNEQ_NEB    *= c1o2;
-	  //kxzFromfcNEQ_NEB    *= c1o2;
-	  //kxxMyyFromfcNEQ_NEB *= c1o2;
-	  //kxxMzzFromfcNEQ_NEB *= c1o2;
-	  //
-	  //
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //kxyFromfcNEQ_SWB    = zero;
-	  //kyzFromfcNEQ_SWB    = zero;
-	  //kxzFromfcNEQ_SWB    = zero;
-	  //kxxMyyFromfcNEQ_SWB = zero;
-	  //kxxMzzFromfcNEQ_SWB = zero;
-	  //kxyFromfcNEQ_SWT    = zero;
-	  //kyzFromfcNEQ_SWT    = zero;
-	  //kxzFromfcNEQ_SWT    = zero;
-	  //kxxMyyFromfcNEQ_SWT = zero;
-	  //kxxMzzFromfcNEQ_SWT = zero;
-	  //kxyFromfcNEQ_SET    = zero;
-	  //kyzFromfcNEQ_SET    = zero;
-	  //kxzFromfcNEQ_SET    = zero;
-	  //kxxMyyFromfcNEQ_SET = zero;
-	  //kxxMzzFromfcNEQ_SET = zero;
-	  //kxyFromfcNEQ_SEB    = zero;
-	  //kyzFromfcNEQ_SEB    = zero;
-	  //kxzFromfcNEQ_SEB    = zero;
-	  //kxxMyyFromfcNEQ_SEB = zero;
-	  //kxxMzzFromfcNEQ_SEB = zero;
-	  //kxyFromfcNEQ_NWB    = zero;
-	  //kyzFromfcNEQ_NWB    = zero;
-	  //kxzFromfcNEQ_NWB    = zero;
-	  //kxxMyyFromfcNEQ_NWB = zero;
-	  //kxxMzzFromfcNEQ_NWB = zero;
-	  //kxyFromfcNEQ_NWT    = zero;
-	  //kyzFromfcNEQ_NWT    = zero;
-	  //kxzFromfcNEQ_NWT    = zero;
-	  //kxxMyyFromfcNEQ_NWT = zero;
-	  //kxxMzzFromfcNEQ_NWT = zero;
-	  //kxyFromfcNEQ_NET    = zero;
-	  //kyzFromfcNEQ_NET    = zero;
-	  //kxzFromfcNEQ_NET    = zero;
-	  //kxxMyyFromfcNEQ_NET = zero;
-	  //kxxMzzFromfcNEQ_NET = zero;
-	  //kxyFromfcNEQ_NEB    = zero;
-	  //kyzFromfcNEQ_NEB    = zero;
-	  //kxzFromfcNEQ_NEB    = zero;
-	  //kxxMyyFromfcNEQ_NEB = zero;
-	  //kxxMzzFromfcNEQ_NEB = zero;
       //////////////////////////////////////////////////////////////////////////
       //3
       //////////////////////////////////////////////////////////////////////////
@@ -13278,8 +12474,8 @@ __global__ void scaleFC_Fix_comp_27(  real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -13296,96 +12492,96 @@ __global__ void scaleFC_Fix_comp_27(  real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -15138,8 +14334,8 @@ __global__ void scaleFC_NSPress_27(   real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -15156,96 +14352,96 @@ __global__ void scaleFC_NSPress_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -16344,8 +15540,8 @@ __global__ void scaleFC_Fix_27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
@@ -16362,96 +15558,96 @@ __global__ void scaleFC_Fix_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -17704,8 +16900,8 @@ __global__ void scaleFCpress27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -17722,96 +16918,96 @@ __global__ void scaleFCpress27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -18629,8 +17825,8 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -18647,96 +17843,96 @@ __global__ void scaleFCLast27( real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -20027,8 +19223,8 @@ __global__ void scaleFCThSMG7(    real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
@@ -20040,127 +19236,124 @@ __global__ void scaleFCThSMG7(    real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
 
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -20900,8 +20093,8 @@ __global__ void scaleFCThS7(   real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -20912,127 +20105,124 @@ __global__ void scaleFCThS7(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
 
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -21691,8 +20881,8 @@ __global__ void scaleFCThS27(     real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
@@ -21704,187 +20894,184 @@ __global__ void scaleFCThS27(     real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions27 D27F;
-   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
-   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
-   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
-   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
-   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
-   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
-   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
-   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
-   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
-   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
-   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
-   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
-   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
-   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
-   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
-   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
-   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
-   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
-   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
-   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
-   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
-   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
-   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
-   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
-   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
-   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
-   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
+   D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine];
+   D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine];
+   D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine];
+   D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine];
+   D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine];
+   D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine];
+   D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine];
+   D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine];
+   D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine];
+   D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine];
+   D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine];
+   D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine];
+   D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine];
+   D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine];
+   D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine];
+   D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine];
+   D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine];
+   D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine];
+   D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine];
+   D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine];
+   D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine];
+   D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine];
+   D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine];
+   D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine];
+   D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine];
+   D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine];
+   D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
    else
    {
-      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -21980,33 +21167,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22067,33 +21254,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22154,33 +21341,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22241,33 +21428,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22338,33 +21525,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22425,33 +21612,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22512,33 +21699,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22599,33 +21786,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22739,32 +21926,32 @@ __global__ void scaleFCThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27C.f[DIR_000])[kzero] =   c8o27* Conc_C*(c1o1-cu_sq);
-      (D27C.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27C.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27C.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27C.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27C.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27C.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27C.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27C.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27C.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27C.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27C.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27C.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27C.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27C.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27C.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27C.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27C.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27C.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27C.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27C.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27C.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27C.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27C.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27C.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27C.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27C.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27C.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27C.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27C.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27C.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27C.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27C.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27C.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
    }
 }
@@ -22812,8 +21999,8 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC, 
-                                        unsigned int size_MatF, 
+                                        unsigned long long numberOfLBnodesCoarse, 
+                                        unsigned long long numberOfLBnodesFine, 
                                         bool isEvenTimestep,
                                         unsigned int* posC, 
                                         unsigned int* posFSWB, 
@@ -22830,96 +22017,96 @@ __global__ void scaleFCEff27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -23791,8 +22978,8 @@ __global__ void scaleFC27(real* DC,
                                      unsigned int* neighborFX,
                                      unsigned int* neighborFY,
                                      unsigned int* neighborFZ,
-										       unsigned int size_MatC, 
-										       unsigned int size_MatF, 
+										       unsigned long long numberOfLBnodesCoarse, 
+										       unsigned long long numberOfLBnodesFine, 
 										       bool isEvenTimestep,
                                      unsigned int* posC, 
                                      unsigned int* posFSWB, 
@@ -23808,96 +22995,96 @@ __global__ void scaleFC27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
          *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
          *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
index e7fe8b50637e97b9c8cc34025216f4d02e684c55..3b108ad4ae43bd63698f3516a207630214695797 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
@@ -23,8 +23,8 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 													 unsigned int* neighborFX,
 													 unsigned int* neighborFY,
 													 unsigned int* neighborFZ,
-													 unsigned int size_MatC, 
-													 unsigned int size_MatF, 
+													 unsigned long long numberOfLBnodesCoarse, 
+													 unsigned long long numberOfLBnodesFine, 
 													 bool isEvenTimestep,
 													 unsigned int* posC, 
 													 unsigned int* posFSWB, 
@@ -44,33 +44,33 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -79,83 +79,83 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
    else
    {
-	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -1270,8 +1270,8 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -1291,33 +1291,33 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -1326,83 +1326,83 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
    else
    {
-	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
index f4160b89c047a7e6244a5579baae03d30b3c89cb..0724002cffa3a47820664851ffefd1c35dbe0235 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
@@ -32,12 +32,13 @@
 //=======================================================================================
 
 #include "DataTypes.h"
-#include "Kernel/Utilities/DistributionHelper.cuh"
-#include "Kernel/Utilities/ChimeraTransformation.h"
-#include "Kernel/Utilities/ScalingHelperFunctions.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
+#include "LBM/GPUHelperFunctions/ScalingUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Calculate the interpolated distributions on the fine destination nodes
@@ -226,8 +227,8 @@ __global__ void scaleCF_compressible(
     unsigned int* neighborXfine,
     unsigned int* neighborYfine,
     unsigned int* neighborZfine,
-    unsigned int numberOfLBnodesCoarse, 
-    unsigned int numberOfLBnodesFine, 
+    unsigned long long numberOfLBnodesCoarse, 
+    unsigned long long numberOfLBnodesFine, 
     bool isEvenTimestep,
     unsigned int* indicesCoarseMMM, 
     unsigned int* indicesFineMMM, 
@@ -237,13 +238,13 @@ __global__ void scaleCF_compressible(
     OffCF offsetCF)
 {
     ////////////////////////////////////////////////////////////////////////////////
-    //! - Get the thread index coordinates from threadId_100, blockId_100, blockDim and gridDim.
+    //! - Get the node index coordinates from threadId_100, blockId_100, blockDim and gridDim.
     //!
-    const unsigned k_thread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     //! - Return for non-interface node
-    if (k_thread >= numberOfInterfaceNodes)
+    if (nodeIndex >= numberOfInterfaceNodes)
         return;
 
     //////////////////////////////////////////////////////////////////////////
@@ -252,8 +253,9 @@ __global__ void scaleCF_compressible(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 distFine   = vf::gpu::getDistributionReferences27(distributionsFine,   numberOfLBnodesFine,   true);
-    Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
+    Distributions27 distFine, distCoarse;
+    getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true);
+    getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - declare local variables for source nodes
@@ -289,7 +291,7 @@ __global__ void scaleCF_compressible(
     // source node BSW = MMM
     ////////////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
-    unsigned int k_base_000 = indicesCoarseMMM[k_thread];
+    unsigned int k_base_000 = indicesCoarseMMM[nodeIndex];
     unsigned int k_base_M00 = neighborXcoarse [k_base_000];
     unsigned int k_base_0M0 = neighborYcoarse [k_base_000];
     unsigned int k_base_00M = neighborZcoarse [k_base_000];
@@ -452,119 +454,240 @@ __global__ void scaleCF_compressible(
     real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111;
     real d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111;
 
-    a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
-            kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
-            kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
-            c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
-            c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
-            c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
-            c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
-            c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
-            c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
-            c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
-            c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
-            c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
-            kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
-            kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
-            c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
-            c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
-            c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
-            c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
-            c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
-            c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
-            c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
-            c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
-            c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
-            c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
-            c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
-            c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
-            c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
-            c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
-            c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
-            c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
-            c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
-            c64o1;
-    a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
-            kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
-            kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
-            c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
-            c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c16o1;
-    b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
-            c8o1;
-    c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP -
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
-            c8o1;
-    a_010  = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_010  = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_010  = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c8o1;
-    b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
-            c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
-            c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
-            kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
-            kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
-            c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
-            c2o1 * vx3_MMP) /
-            c16o1;
-    c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
-            c8o1;
-    a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
-    b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
-    c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
-    a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
-            c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
-            c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
-            c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
-            c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
-            c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
-            c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c16o1;
-    a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
-    b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
-    c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
-    a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-    a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-
-    a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP;
-    b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP;
-    c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP;
+    // a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
+    //         kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
+    //         kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
+    //         kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
+    //         c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
+    //         c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
+    //         c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
+    //         c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
+    //         c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
+    //         c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
+    //         c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
+    //         c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c64o1;
+    a_000 =
+        c1o64 * (c2o1 * (((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) +
+                         ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) +
+                         ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) +
+                         ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) +
+                         ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) +
+                 c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) +
+                 ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                 ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+                 ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                 ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+
+    // b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
+    //         c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
+    //         c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
+    //         kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
+    //         kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
+    //         c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
+    //         c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
+    //         c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
+    //         c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
+    //         c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
+    //         c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
+    //         c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
+    //         c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c64o1;
+    b_000 =
+        c1o64 * (c2o1 * (((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                         ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+                         ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) +
+                         ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) +
+                         ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+                         ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) +
+                         ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+                 c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+                 ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                 ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+
+    // c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
+    //         kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
+    //         c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
+    //         c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
+    //         c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
+    //         c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
+    //         c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
+    //         c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
+    //         c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
+    //         c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
+    //         c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
+    //         c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
+    //         c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
+    //         c64o1;
+    c_000 =
+        c1o64 * (c2o1 * (((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                         ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) +
+                         ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                         ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) +
+                         ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) +
+                         ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) +
+                         ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) +
+                         ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) +
+                 c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) +
+                 ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                 ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)));
+
+    // a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
+    a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM)));
+
+    // b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
+    b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM)));
+
+    // c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
+    c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM)));
+
+    // a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
+    //         kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
+    //         kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
+    //         kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
+    //         c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
+    //         c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
+    //         c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c16o1;
+    a_200 =
+        c1o16 * (c2o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) +
+                 ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                 ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+                 ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                 ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+
+    // b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
+    //         kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
+    //         c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
+    //         c8o1;
+    b_200 =
+        c1o8 * (c2o1 * (-((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) +
+                ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) +
+                ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)));
+
+    // c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
+    //          kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - c2o1 *
+    //          vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
+    //         c8o1;
+    c_200 = c1o8 * (c2o1 * (((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) +
+                    ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM)));
+
+    // a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
+    a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP)));
+
+    // b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
+    b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP)));
+
+    // c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
+    c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP)));
+
+    // a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
+    //         kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
+    //         c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
+    //         c8o1;
+    a_020 =
+        c1o8 * (c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+                ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) +
+                ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)));
+
+    // b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
+    //         c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
+    //         c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
+    //         kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
+    //         kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
+    //         c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
+    //         c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
+    //         c2o1 * vx3_MMP) /
+    //         c16o1;
+    b_020 =
+        c1o16 * (c2o1 * (((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                         ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+                         ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+                 ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                 ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+
+    // c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
+    //          kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + c2o1 *
+    //          vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
+    //         c8o1;
+    c_020 = c1o8 * (c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) +
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+                    ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)));
+
+    // a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
+    a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM)));
+
+    // b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
+    b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM)));
+
+    // c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
+    c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM)));
+
+    // a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
+    //         kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
+    //         c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
+    //         c8o1;
+    a_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                    ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM)));
+
+    // b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
+    //          kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + c2o1 *
+    //          vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
+    //         c8o1;
+    b_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) +
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) +
+                    ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM)));
+
+    // c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
+    //         kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
+    //         c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
+    //         c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
+    //         c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
+    //         c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
+    //         c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
+    //         c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
+    //         c16o1;
+    c_002 =
+        c1o16 * (c2o1 * (((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                         ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) +
+                         ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) +
+                         ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) +
+                 ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                 ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM)));
+
+    // a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
+    // b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
+    // c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
+    a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP)));
+    b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP)));
+    c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP)));
+
+    // a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
+    // b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
+    // c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)));
+    b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP)));
+    c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP)));
+
+    // a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
+    // b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
+    // c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP)));
+    b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP)));
+    c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP)));
+
+    // a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP;
+    // b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP;
+    // c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP;
+    a_111 = ((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPM - vx1_PMP) + (vx1_PMM - vx1_MPP));
+    b_111 = ((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPM - vx2_PMP) + (vx2_PMM - vx2_MPP));
+    c_111 = ((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPM - vx3_PMP) + (vx3_PMM - vx3_MPP));
 
     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -618,9 +741,9 @@ __global__ void scaleCF_compressible(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set the relative position of the offset cell {-1, 0, 1}
     //!
-    real xoff    = offsetCF.xOffCF[k_thread];
-    real yoff    = offsetCF.yOffCF[k_thread];
-    real zoff    = offsetCF.zOffCF[k_thread];
+    real xoff    = offsetCF.xOffCF[nodeIndex];
+    real yoff    = offsetCF.yOffCF[nodeIndex];
+    real zoff    = offsetCF.zOffCF[nodeIndex];
 
     real xoff_sq = xoff * xoff;
     real yoff_sq = yoff * yoff;
@@ -632,14 +755,29 @@ __global__ void scaleCF_compressible(
         ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1))
         ? c0o1
         : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
-    d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8;
-    d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
-    d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
-    d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_111 =  -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP;
+    // d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8;
+    d_000 = c1o8 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+
+    // d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
+    d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM)));
+
+    // d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
+    d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP)));
+
+    // d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
+    d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM)));
+
+    // d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
+    d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+
+    // d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
+    d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP)));
+
+    // d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
+    d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM)));
+
+    // d_111 =  -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP;
+    d_111 = (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_PMM - drho_MPP) + (drho_MPM - drho_PMP)));
 
     //////////////////////////////////////////////////////////////////////////
     //! - Extrapolation for refinement in to the wall (polynomial coefficients)
@@ -768,7 +906,7 @@ __global__ void scaleCF_compressible(
 
     //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
-    k_base_000 = indicesFineMMM[k_thread];
+    k_base_000 = indicesFineMMM[nodeIndex];
     k_base_M00 = neighborXfine [k_base_000];
     k_base_0M0 = neighborYfine [k_base_000];
     k_base_00M = neighborZfine [k_base_000];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
index 3ab8b9d20279eff341ca42d20cee9fe7550a2039..e7d999d108e59bca98bf87b813f9479f1c601266 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
@@ -31,12 +31,13 @@
 //! \author Martin Schoenherr, Anna Wellmann
 //=======================================================================================
 
-#include "Kernel/Utilities/DistributionHelper.cuh"
-#include "Kernel/Utilities/ChimeraTransformation.h"
-#include "Kernel/Utilities/ScalingHelperFunctions.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ScalingUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Interpolate from fine to coarse
@@ -54,8 +55,8 @@ __global__ void scaleFC_compressible(
     unsigned int *neighborXfine,
     unsigned int *neighborYfine,
     unsigned int *neighborZfine,
-    unsigned int numberOfLBnodesCoarse,
-    unsigned int numberOfLBnodesFine,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
     unsigned int *indicesCoarse000,
     unsigned int *indicesFineMMM,
@@ -65,13 +66,13 @@ __global__ void scaleFC_compressible(
     OffFC offsetFC)
 {
     ////////////////////////////////////////////////////////////////////////////////
-    //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
-    const unsigned k_thread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     //! - Return for non-interface node
-    if (k_thread >= numberOfInterfaceNodes)
+    if (nodeIndex >= numberOfInterfaceNodes)
         return;
 
     //////////////////////////////////////////////////////////////////////////
@@ -80,8 +81,9 @@ __global__ void scaleFC_compressible(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 distFine   = vf::gpu::getDistributionReferences27(distributionsFine,   numberOfLBnodesFine,   true);
-    Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
+    Distributions27 distFine, distCoarse;
+    getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true);
+    getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - declare local variables for source nodes
@@ -117,7 +119,7 @@ __global__ void scaleFC_compressible(
     // source node BSW = MMM
     //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
-    unsigned int k_base_000 = indicesFineMMM[k_thread];
+    unsigned int k_base_000 = indicesFineMMM[nodeIndex];
     unsigned int k_base_M00 = neighborXfine [k_base_000];
     unsigned int k_base_0M0 = neighborYfine [k_base_000];
     unsigned int k_base_00M = neighborZfine [k_base_000];
@@ -278,115 +280,120 @@ __global__ void scaleFC_compressible(
     real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011;
     real d_000, d_100, d_010, d_001, d_110, d_101, d_011;
 
-    a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
-            kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
-            kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
-            c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
-            c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
-            c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
-            c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
-            c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
-            c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
-            c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
-            c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
-            c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
-            kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
-            kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
-            c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
-            c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
-            c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
-            c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
-            c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
-            c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
-            c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
-            c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
-            c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
-            c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
-            c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
-            c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
-            c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
-            c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
-            c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
-            c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
-            c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
-            c64o1;
-    a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
-            kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
-            kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
-            c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
-            c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c16o1;
-    b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
-            c8o1;
-    c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP -
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
-            c8o1;
-    a_010  = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_010  = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_010  = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c8o1;
-    b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
-            c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
-            c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
-            kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
-            kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
-            c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
-            c2o1 * vx3_MMP) /
-            c16o1;
-    c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
-            c8o1;
-    a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
-    b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
-    c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
-    a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
-            c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
-            c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
-            c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
-            c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
-            c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
-            c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c16o1;
-    a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
-    b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
-    c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
-    a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-    a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_000 = c1o64 * (
+            c2o1 * (
+            ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) + 
+            ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + 
+            ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) + 
+            c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) +
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + 
+            ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+    b_000 = c1o64 * (
+            c2o1 * (
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + 
+            ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + 
+            ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) + 
+            ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + 
+            ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) + 
+            ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + 
+            c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + 
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+            ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+    c_000 = c1o64 * ( 
+            c2o1 * (
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) + 
+            ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + 
+            ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + 
+            ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + 
+            ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) + 
+            ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) + 
+            ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) + 
+            c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) +
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)));
+
+    a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM)));
+    b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM)));
+    c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM)));
+
+    a_200 = c1o16 * ( 
+            c2o1 * (
+            ((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) + 
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + 
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+    b_200 = c1o8 * (
+            c2o1 * (
+            -((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) +
+            ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + 
+            ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)));
+    c_200 = c1o8 * (
+            c2o1 * (
+            ((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) +
+            ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + 
+            ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM)));
+
+    a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP)));
+    b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP)));
+    c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP)));
+
+    a_020 = c1o8 * (
+            c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+            ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + 
+            ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)));
+    b_020 = c1o16 * (
+            c2o1 * (
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+            ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+    c_020 = c1o8 * (
+            c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) +
+            ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+            ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)));
+
+    a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM)));
+    b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM)));
+    c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM)));
+
+    a_002 = c1o8 * (
+            c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                    ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM)));
+    b_002 = c1o8 * (
+            c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) + 
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + 
+                    ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM)));
+    c_002 = c1o16 * (
+            c2o1 * (
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) + 
+            ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) + 
+            ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) + 
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+            ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM)));
+
+    a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP)));
+    b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP)));
+    c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP)));
+
+    a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)));
+    b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP)));
+    c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP)));
+    
+    a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP)));
+    b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP)));
+    c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP)));
 
     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
@@ -399,9 +406,9 @@ __global__ void scaleFC_compressible(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set the relative position of the offset cell {-1, 0, 1}
     //!
-    real xoff    = offsetFC.xOffFC[k_thread];
-    real yoff    = offsetFC.yOffFC[k_thread];
-    real zoff    = offsetFC.zOffFC[k_thread];
+    real xoff    = offsetFC.xOffFC[nodeIndex];
+    real yoff    = offsetFC.yOffFC[nodeIndex];
+    real zoff    = offsetFC.zOffFC[nodeIndex];
      
     real xoff_sq = xoff * xoff;
     real yoff_sq = yoff * yoff;
@@ -412,15 +419,14 @@ __global__ void scaleFC_compressible(
     //! 
     real LaplaceRho = 
         ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1))
-        ? c0o1
-        : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
-    d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP - c2o1 * LaplaceRho) * c1o8;
-    d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
-    d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
-    d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
+        ? c0o1 : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
+    d_000 =  c1o8 * ((((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))) - c2o1 * LaplaceRho);
+    d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM)));
+    d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP)));
+    d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM)));
+    d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+    d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP)));
+    d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM)));
 
 
     //////////////////////////////////////////////////////////////////////////
@@ -639,7 +645,7 @@ __global__ void scaleFC_compressible(
 
     ////////////////////////////////////////////////////////////////////////////////////
     // index of the destination node and its neighbors
-    k_000 = indicesCoarse000[k_thread];
+    k_000 = indicesCoarse000[nodeIndex];
     k_M00 = neighborXcoarse [k_000];
     k_0M0 = neighborYcoarse [k_000];
     k_00M = neighborZcoarse [k_000];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
index 6d497d2a1ab7ec305bec4f1ad1ed2e2d63c4dc27..23666fdcf6714d30b40b4750c52f129cc472761c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
@@ -15,7 +15,7 @@ __global__ void LBInit27( int myid,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
                                      real* vParabel,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      unsigned int grid_nx, 
                                      unsigned int grid_ny, 
                                      unsigned int grid_nz, 
@@ -24,33 +24,33 @@ __global__ void LBInit27( int myid,
                                      int maxlev)
 {
    Distributions27 D;
-   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+   D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+   D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+   D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+   D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+   D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+   D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+   D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+   D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+   D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+   D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+   D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+   D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+   D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+   D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+   D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+   D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+   D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+   D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+   D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+   D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+   D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+   D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+   D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+   D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+   D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    ////////////////////////////////////////////////////////////////////////////////
    unsigned int  k;                   // Zugriff auf arrays im device
    //
@@ -142,32 +142,32 @@ __global__ void LBInit27( int myid,
    real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
    (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-   (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-   (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-   (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-   (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-   (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-   (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-   (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-   (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-   (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-   (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-   (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-   (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-   (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-   (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-   (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-   (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-   (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-   (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-   (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-   (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-   (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-   (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-   (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-   (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-   (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-   (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+   (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+   (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+   (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+   (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+   (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+   (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+   (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+   (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+   (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+   (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+   (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+   (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+   (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+   (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+   (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+   (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+   (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+   (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
 }
 ////////////////////////////////////////////////////////////////////////////////
@@ -191,7 +191,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
                                                 real* ux,
                                                 real* uy,
                                                 real* uz,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 real* DD,
                                                 real omega,
                                                 bool EvenOrOdd)
@@ -207,7 +207,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
     
-    if(k<size_Mat)
+    if(k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -218,63 +218,63 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             Distributions27 D;
             if (EvenOrOdd==true)
             {
-                D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-                D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-                D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-                D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-                D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-                D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DD[DIR_000*size_Mat];
-                D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-                D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-                D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-                D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-                D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-                D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-                D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-                D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-                D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-                D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-                D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-                D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-                D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DD[DIR_000*size_Mat];
-                D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-                D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-                D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-                D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-                D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-                D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-                D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-                D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
             }
             //////////////////////////////////////////////////////////////////////////
             real drho = rho[k];//0.0f;//
@@ -397,62 +397,62 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
             
             (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-            (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-            (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-            (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-            (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-            (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-            (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-            (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-            (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-            (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-            (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-            (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-            (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-            (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-            (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-            (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-            (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-            (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-            (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-            (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-            (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-            (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-            (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-            (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-            (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-            (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-            (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+            (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+            (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+            (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+            (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+            (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+            (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+            (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+            (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+            (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+            (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+            (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+            (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+            (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+            (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+            (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+            (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+            (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+            (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
             //////////////////////////////////////////////////////////////////////////
 
             (D.f[DIR_000])[kzero] += f_ZERO;
-            (D.f[DIR_P00   ])[ke   ] += f_E   ;
-            (D.f[DIR_M00   ])[kw   ] += f_E   ;
-            (D.f[DIR_0P0   ])[kn   ] += f_N   ;
-            (D.f[DIR_0M0   ])[ks   ] += f_N   ;
-            (D.f[DIR_00P   ])[kt   ] += f_T   ;
-            (D.f[DIR_00M   ])[kb   ] += f_T   ;
-            (D.f[DIR_PP0  ])[kne  ] += f_NE  ;
-            (D.f[DIR_MM0  ])[ksw  ] += f_NE  ;
-            (D.f[DIR_PM0  ])[kse  ] += f_SE  ;
-            (D.f[DIR_MP0  ])[knw  ] += f_SE  ;
-            (D.f[DIR_P0P  ])[kte  ] += f_TE  ;
-            (D.f[DIR_M0M  ])[kbw  ] += f_TE  ;
-            (D.f[DIR_P0M  ])[kbe  ] += f_BE  ;
-            (D.f[DIR_M0P  ])[ktw  ] += f_BE  ;
-            (D.f[DIR_0PP  ])[ktn  ] += f_TN  ;
-            (D.f[DIR_0MM  ])[kbs  ] += f_TN  ;
-            (D.f[DIR_0PM  ])[kbn  ] += f_BN  ;
-            (D.f[DIR_0MP  ])[kts  ] += f_BN  ;
-            (D.f[DIR_PPP ])[ktne ] += f_TNE ;
-            (D.f[DIR_MMM ])[kbsw ] += f_TNE ;
-            (D.f[DIR_PPM ])[kbne ] += f_TSW ;
-            (D.f[DIR_MMP ])[ktsw ] += f_TSW ;
-            (D.f[DIR_PMP ])[ktse ] += f_TSE ;
-            (D.f[DIR_MPM ])[kbnw ] += f_TSE ;
-            (D.f[DIR_PMM ])[kbse ] += f_TNW ;
-            (D.f[DIR_MPP ])[ktnw ] += f_TNW ;
+            (D.f[DIR_P00])[ke   ] += f_E   ;
+            (D.f[DIR_M00])[kw   ] += f_E   ;
+            (D.f[DIR_0P0])[kn   ] += f_N   ;
+            (D.f[DIR_0M0])[ks   ] += f_N   ;
+            (D.f[DIR_00P])[kt   ] += f_T   ;
+            (D.f[DIR_00M])[kb   ] += f_T   ;
+            (D.f[DIR_PP0])[kne  ] += f_NE  ;
+            (D.f[DIR_MM0])[ksw  ] += f_NE  ;
+            (D.f[DIR_PM0])[kse  ] += f_SE  ;
+            (D.f[DIR_MP0])[knw  ] += f_SE  ;
+            (D.f[DIR_P0P])[kte  ] += f_TE  ;
+            (D.f[DIR_M0M])[kbw  ] += f_TE  ;
+            (D.f[DIR_P0M])[kbe  ] += f_BE  ;
+            (D.f[DIR_M0P])[ktw  ] += f_BE  ;
+            (D.f[DIR_0PP])[ktn  ] += f_TN  ;
+            (D.f[DIR_0MM])[kbs  ] += f_TN  ;
+            (D.f[DIR_0PM])[kbn  ] += f_BN  ;
+            (D.f[DIR_0MP])[kts  ] += f_BN  ;
+            (D.f[DIR_PPP])[ktne ] += f_TNE ;
+            (D.f[DIR_MMM])[kbsw ] += f_TNE ;
+            (D.f[DIR_PPM])[kbne ] += f_TSW ;
+            (D.f[DIR_MMP])[ktsw ] += f_TSW ;
+            (D.f[DIR_PMP])[ktse ] += f_TSE ;
+            (D.f[DIR_MPM])[kbnw ] += f_TSE ;
+            (D.f[DIR_PMM])[kbse ] += f_TNW ;
+            (D.f[DIR_MPP])[ktnw ] += f_TNW ;
 
             //////////////////////////////////////////////////////////////////////////
         }
@@ -460,7 +460,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
 	    {
 		    //////////////////////////////////////////////////////////////////////////
 		    Distributions27 D;
-		    D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		    D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
 		    //////////////////////////////////////////////////////////////////////////
 		    (D.f[DIR_000])[k] = c96o1;
 		    //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
index c091aa8b9a29017ddc0f6ea6584e805d7afc4859..7f67d1692f7e136a6537be6780fe8625adc33e22 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
@@ -47,7 +47,7 @@ __global__ void InitAD27(
 	real* velocityX,
 	real* velocityY,
 	real* velocityZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* distributionsAD,
 	bool isEvenTimestep)
 {
@@ -68,7 +68,7 @@ __global__ void InitAD27(
 
 	//////////////////////////////////////////////////////////////////////////
 	// run for all indices in size_Mat and fluid nodes
-	if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+	if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -77,63 +77,63 @@ __global__ void InitAD27(
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////
 		//! - Set local velocities and concetration
@@ -178,32 +178,32 @@ __global__ void InitAD27(
 		real cu_sq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 
 		(distAD.f[DIR_000])[kzero] = c8o27  * conc * (c1o1 - cu_sq);
-		(distAD.f[DIR_P00   ])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
-		(distAD.f[DIR_M00   ])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
-		(distAD.f[DIR_0P0   ])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
-		(distAD.f[DIR_0M0   ])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
-		(distAD.f[DIR_00P   ])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
-		(distAD.f[DIR_00M   ])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
-		(distAD.f[DIR_PP0  ])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
-		(distAD.f[DIR_MM0  ])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
-		(distAD.f[DIR_PM0  ])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
-		(distAD.f[DIR_MP0  ])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
-		(distAD.f[DIR_P0P  ])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
-		(distAD.f[DIR_M0M  ])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
-		(distAD.f[DIR_P0M  ])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
-		(distAD.f[DIR_M0P  ])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
-		(distAD.f[DIR_0PP  ])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_0MM  ])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_0PM  ])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_0MP  ])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_PPP ])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_MMM ])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_PPM ])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_MMP ])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_PMP ])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_MPM ])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_PMM ])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_MPP ])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_P00])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
+		(distAD.f[DIR_M00])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
+		(distAD.f[DIR_0P0])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
+		(distAD.f[DIR_0M0])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
+		(distAD.f[DIR_00P])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
+		(distAD.f[DIR_00M])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
+		(distAD.f[DIR_PP0])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_MM0])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_PM0])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_MP0])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_P0P])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_M0M])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_P0M])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_M0P])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_0PP])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_0MM])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0PM])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0MP])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PPP])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MMM])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PPM])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MMP])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PMP])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MPM])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PMM])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MPP])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
 	}
 }
 
@@ -263,63 +263,63 @@ __global__ void InitAD27(
 //          Distributions27 D27;
 //          if (EvenOrOdd==true)
 //          {
-//             D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-//             D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-//             D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-//             D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-//             D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-//             D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-//             D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-//             D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-//             D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-//             D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-//             D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-//             D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-//             D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-//             D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-//             D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-//             D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-//             D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-//             D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//             D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-//             D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-//             D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-//             D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-//             D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-//             D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-//             D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-//             D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+//             D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+//             D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+//             D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+//             D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+//             D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+//             D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+//             D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+//             D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+//             D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+//             D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+//             D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+//             D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+//             D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+//             D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+//             D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+//             D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+//             D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+//             D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//             D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+//             D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+//             D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+//             D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+//             D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+//             D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+//             D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+//             D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 //          }
 //          else
 //          {
-//             D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-//             D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-//             D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-//             D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-//             D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-//             D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-//             D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-//             D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-//             D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-//             D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-//             D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-//             D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-//             D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-//             D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-//             D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-//             D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-//             D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-//             D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//             D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-//             D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-//             D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-//             D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-//             D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-//             D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-//             D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-//             D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+//             D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+//             D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+//             D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+//             D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+//             D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+//             D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+//             D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+//             D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+//             D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+//             D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+//             D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+//             D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+//             D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+//             D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+//             D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+//             D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+//             D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+//             D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//             D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+//             D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+//             D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+//             D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+//             D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+//             D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+//             D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+//             D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 //          }
 //          //////////////////////////////////////////////////////////////////////////
 //          real ConcD = Conc[k];
@@ -391,32 +391,32 @@ __global__ void InitAD27(
 //          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
 //          (D27.f[DIR_000])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
-//          (D27.f[DIR_P00   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-//          (D27.f[DIR_M00   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-//          (D27.f[DIR_0P0   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-//          (D27.f[DIR_0M0   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-//          (D27.f[DIR_00P   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-//          (D27.f[DIR_00M   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-//          (D27.f[DIR_PP0  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-//          (D27.f[DIR_MM0  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-//          (D27.f[DIR_PM0  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-//          (D27.f[DIR_MP0  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-//          (D27.f[DIR_P0P  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-//          (D27.f[DIR_M0M  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-//          (D27.f[DIR_P0M  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-//          (D27.f[DIR_M0P  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-//          (D27.f[DIR_0PP  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-//          (D27.f[DIR_0MM  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-//          (D27.f[DIR_0PM  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-//          (D27.f[DIR_0MP  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-//          (D27.f[DIR_PPP ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-//          (D27.f[DIR_MMM ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-//          (D27.f[DIR_PPM ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-//          (D27.f[DIR_MMP ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-//          (D27.f[DIR_PMP ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-//          (D27.f[DIR_MPM ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-//          (D27.f[DIR_PMM ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-//          (D27.f[DIR_MPP ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_P00])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+//          (D27.f[DIR_M00])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+//          (D27.f[DIR_0P0])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+//          (D27.f[DIR_0M0])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+//          (D27.f[DIR_00P])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+//          (D27.f[DIR_00M])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+//          (D27.f[DIR_PP0])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_MM0])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_PM0])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_MP0])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_P0P])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_M0M])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_P0M])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_M0P])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_0PP])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+//          (D27.f[DIR_0MM])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0PM])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0MP])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PPP])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MMM])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PPM])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MMP])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PMP])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MPM])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PMM])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MPP])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 //          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //       }
 //    }
@@ -448,7 +448,7 @@ __global__ void InitAD7( unsigned int* neighborX,
                                     real* ux,
                                     real* uy,
                                     real* uz,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     real* DD7,
                                     bool EvenOrOdd)
 {
@@ -463,7 +463,7 @@ __global__ void InitAD7( unsigned int* neighborX,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -474,23 +474,23 @@ __global__ void InitAD7( unsigned int* neighborX,
          Distributions7 D7;
          if (EvenOrOdd==true)
          {
-            D7.f[0] = &DD7[0*size_Mat];
-            D7.f[1] = &DD7[1*size_Mat];
-            D7.f[2] = &DD7[2*size_Mat];
-            D7.f[3] = &DD7[3*size_Mat];
-            D7.f[4] = &DD7[4*size_Mat];
-            D7.f[5] = &DD7[5*size_Mat];
-            D7.f[6] = &DD7[6*size_Mat];
+            D7.f[0] = &DD7[0*numberOfLBnodes];
+            D7.f[1] = &DD7[1*numberOfLBnodes];
+            D7.f[2] = &DD7[2*numberOfLBnodes];
+            D7.f[3] = &DD7[3*numberOfLBnodes];
+            D7.f[4] = &DD7[4*numberOfLBnodes];
+            D7.f[5] = &DD7[5*numberOfLBnodes];
+            D7.f[6] = &DD7[6*numberOfLBnodes];
          }
          else
          {
-            D7.f[0] = &DD7[0*size_Mat];
-            D7.f[2] = &DD7[1*size_Mat];
-            D7.f[1] = &DD7[2*size_Mat];
-            D7.f[4] = &DD7[3*size_Mat];
-            D7.f[3] = &DD7[4*size_Mat];
-            D7.f[6] = &DD7[5*size_Mat];
-            D7.f[5] = &DD7[6*size_Mat];
+            D7.f[0] = &DD7[0*numberOfLBnodes];
+            D7.f[2] = &DD7[1*numberOfLBnodes];
+            D7.f[1] = &DD7[2*numberOfLBnodes];
+            D7.f[4] = &DD7[3*numberOfLBnodes];
+            D7.f[3] = &DD7[4*numberOfLBnodes];
+            D7.f[6] = &DD7[5*numberOfLBnodes];
+            D7.f[5] = &DD7[6*numberOfLBnodes];
          }
          //////////////////////////////////////////////////////////////////////////
          real ConcD = Conc[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
deleted file mode 100644
index 2f6a11aa17398b65858508c3f94b241c16551b37..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file KernelUtilities.h
-//! \ingroup GPU
-//! \author Martin Schoenherr, Anna Wellmann
-//======================================================================================
-#ifndef KERNELUTILS_H
-#define KERNELUTILS_H
-
-#include "LBM/LB.h"
-#include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-
-__inline__ __device__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep)
-{
-    if (isEvenTimestep)
-    {
-        dist.f[DIR_P00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
-        dist.f[DIR_M00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
-        dist.f[DIR_0P0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
-        dist.f[DIR_0M0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
-        dist.f[DIR_00P   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
-        dist.f[DIR_00M   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
-        dist.f[DIR_PP0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
-        dist.f[DIR_MM0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
-        dist.f[DIR_PM0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
-        dist.f[DIR_MP0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
-        dist.f[DIR_P0P  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
-        dist.f[DIR_M0M  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
-        dist.f[DIR_P0M  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
-        dist.f[DIR_M0P  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
-        dist.f[DIR_0PP  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
-        dist.f[DIR_0MM  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
-        dist.f[DIR_0PM  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
-        dist.f[DIR_0MP  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
-        dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
-        dist.f[DIR_PPP ] = &distributionArray[DIR_PPP *numberOfLBnodes];
-        dist.f[DIR_MMP ] = &distributionArray[DIR_MMP *numberOfLBnodes];
-        dist.f[DIR_PMP ] = &distributionArray[DIR_PMP *numberOfLBnodes];
-        dist.f[DIR_MPP ] = &distributionArray[DIR_MPP *numberOfLBnodes];
-        dist.f[DIR_PPM ] = &distributionArray[DIR_PPM *numberOfLBnodes];
-        dist.f[DIR_MMM ] = &distributionArray[DIR_MMM *numberOfLBnodes];
-        dist.f[DIR_PMM ] = &distributionArray[DIR_PMM *numberOfLBnodes];
-        dist.f[DIR_MPM ] = &distributionArray[DIR_MPM *numberOfLBnodes];
-    }
-    else
-    {
-         dist.f[DIR_M00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
-         dist.f[DIR_P00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
-         dist.f[DIR_0M0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
-         dist.f[DIR_0P0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
-         dist.f[DIR_00M   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
-         dist.f[DIR_00P   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
-         dist.f[DIR_MM0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
-         dist.f[DIR_PP0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
-         dist.f[DIR_MP0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
-         dist.f[DIR_PM0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
-         dist.f[DIR_M0M  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
-         dist.f[DIR_P0P  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
-         dist.f[DIR_M0P  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
-         dist.f[DIR_P0M  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
-         dist.f[DIR_0MM  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
-         dist.f[DIR_0PP  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
-         dist.f[DIR_0MP  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
-         dist.f[DIR_0PM  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
-         dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
-         dist.f[DIR_PPP ] = &distributionArray[DIR_MMM *numberOfLBnodes];
-         dist.f[DIR_MMP ] = &distributionArray[DIR_PPM *numberOfLBnodes];
-         dist.f[DIR_PMP ] = &distributionArray[DIR_MPM *numberOfLBnodes];
-         dist.f[DIR_MPP ] = &distributionArray[DIR_PMM *numberOfLBnodes];
-         dist.f[DIR_PPM ] = &distributionArray[DIR_MMP *numberOfLBnodes];
-         dist.f[DIR_MMM ] = &distributionArray[DIR_PPP *numberOfLBnodes];
-         dist.f[DIR_PMM ] = &distributionArray[DIR_MPP *numberOfLBnodes];
-         dist.f[DIR_MPM ] = &distributionArray[DIR_PMP *numberOfLBnodes];
-    }
-}
-
-__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices)
-{
-    subgridD.q[DIR_P00   ] = &subgridDistances[DIR_P00    *numberOfSubgridIndices];
-    subgridD.q[DIR_M00   ] = &subgridDistances[DIR_M00    *numberOfSubgridIndices];
-    subgridD.q[DIR_0P0   ] = &subgridDistances[DIR_0P0    *numberOfSubgridIndices];
-    subgridD.q[DIR_0M0   ] = &subgridDistances[DIR_0M0    *numberOfSubgridIndices];
-    subgridD.q[DIR_00P   ] = &subgridDistances[DIR_00P    *numberOfSubgridIndices];
-    subgridD.q[DIR_00M   ] = &subgridDistances[DIR_00M    *numberOfSubgridIndices];
-    subgridD.q[DIR_PP0  ] = &subgridDistances[DIR_PP0   *numberOfSubgridIndices];
-    subgridD.q[DIR_MM0  ] = &subgridDistances[DIR_MM0   *numberOfSubgridIndices];
-    subgridD.q[DIR_PM0  ] = &subgridDistances[DIR_PM0   *numberOfSubgridIndices];
-    subgridD.q[DIR_MP0  ] = &subgridDistances[DIR_MP0   *numberOfSubgridIndices];
-    subgridD.q[DIR_P0P  ] = &subgridDistances[DIR_P0P   *numberOfSubgridIndices];
-    subgridD.q[DIR_M0M  ] = &subgridDistances[DIR_M0M   *numberOfSubgridIndices];
-    subgridD.q[DIR_P0M  ] = &subgridDistances[DIR_P0M   *numberOfSubgridIndices];
-    subgridD.q[DIR_M0P  ] = &subgridDistances[DIR_M0P   *numberOfSubgridIndices];
-    subgridD.q[DIR_0PP  ] = &subgridDistances[DIR_0PP   *numberOfSubgridIndices];
-    subgridD.q[DIR_0MM  ] = &subgridDistances[DIR_0MM   *numberOfSubgridIndices];
-    subgridD.q[DIR_0PM  ] = &subgridDistances[DIR_0PM   *numberOfSubgridIndices];
-    subgridD.q[DIR_0MP  ] = &subgridDistances[DIR_0MP   *numberOfSubgridIndices];
-    subgridD.q[DIR_000] = &subgridDistances[DIR_000 *numberOfSubgridIndices];
-    subgridD.q[DIR_PPP ] = &subgridDistances[DIR_PPP  *numberOfSubgridIndices];
-    subgridD.q[DIR_MMP ] = &subgridDistances[DIR_MMP  *numberOfSubgridIndices];
-    subgridD.q[DIR_PMP ] = &subgridDistances[DIR_PMP  *numberOfSubgridIndices];
-    subgridD.q[DIR_MPP ] = &subgridDistances[DIR_MPP  *numberOfSubgridIndices];
-    subgridD.q[DIR_PPM ] = &subgridDistances[DIR_PPM  *numberOfSubgridIndices];
-    subgridD.q[DIR_MMM ] = &subgridDistances[DIR_MMM  *numberOfSubgridIndices];
-    subgridD.q[DIR_PMM ] = &subgridDistances[DIR_PMM  *numberOfSubgridIndices];
-    subgridD.q[DIR_MPM ] = &subgridDistances[DIR_MPM  *numberOfSubgridIndices];
-}
-
-__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight)
-{
-    return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq);
-}
-
-__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                const real& omega, const real& velocity, const real weight)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
-}
-
-__inline__ __device__ real getBounceBackDistributionForVeloBC(  const real& f, 
-                                                                const real& velocity, const real weight)
-{
-
-    return f - (c6o1 * weight * velocity);
-}
-
-__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                  const real& omega)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse)) / (c1o1 + q);
-}
-
-
-__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                            const real& omega, const real& drho, const real& velocity, const real weight)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho;
-}
-
-
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
index 51368bbe09e6fc43a7a1ff6b8b15387417774964..b05cb9201ce30038bd6edf52e2e95a13c6f6d7d4 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
@@ -14,7 +14,7 @@
 
 #include <iomanip>
 
-//#include "Core/Logger/Logger.h"
+#include "cuda/CudaGrid.h"
 
 #include "Parameter/Parameter.h"
 // includes, kernels
@@ -24,7 +24,7 @@
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
-__global__                 void kineticEnergyKernel  (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat);
+__global__                 void kineticEnergyKernel  (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes);
 
 __host__ __device__ inline void kineticEnergyFunction(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint index);
 
@@ -35,56 +35,42 @@ bool KineticEnergyAnalyzer::run(uint iter)
     if( iter % this->analyzeIter != 0 ) return false;
 
 	int lev = 0;
-	int size_Mat = this->para->getParD(lev)->numberOfNodes;
-
-    thrust::device_vector<real> kineticEnergy(size_Mat, c0o1);
-    thrust::device_vector<uint> isFluid      (size_Mat, 0);
-
-	unsigned int numberOfThreads = 128;
-    int Grid = (size_Mat / numberOfThreads)+1;
-    int Grid1, Grid2;
-    if (Grid>512)
-    {
-       Grid1 = 512;
-       Grid2 = (Grid/Grid1)+1;
-    } 
-    else
-    {
-       Grid1 = 1;
-       Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1 );
-
-    LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX,
-											para->getParD(lev)->velocityY,
-											para->getParD(lev)->velocityZ,
-											para->getParD(lev)->rho,
-											para->getParD(lev)->pressure,
-											para->getParD(lev)->typeOfGridNode,
-											para->getParD(lev)->neighborX,
-											para->getParD(lev)->neighborY,
-											para->getParD(lev)->neighborZ,
-											para->getParD(lev)->numberOfNodes,
-											para->getParD(lev)->distributions.f[0],
-											para->getParD(lev)->isEvenTimestep); 
-    getLastCudaError("LBCalcMacSP27 execution failed"); 
-
-	kineticEnergyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX, 
-											    para->getParD(lev)->velocityY, 
-												para->getParD(lev)->velocityZ, 
-												para->getParD(lev)->rho, 
-											    para->getParD(lev)->neighborX,
-											    para->getParD(lev)->neighborY,
-											    para->getParD(lev)->neighborZ,
-											    para->getParD(lev)->neighborInverse,
-											    para->getParD(lev)->typeOfGridNode,
-												kineticEnergy.data().get(), 
-                                                isFluid.data().get(),
-												size_Mat);
-	cudaDeviceSynchronize();
-
-	 getLastCudaError("kineticEnergyKernel execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes);
+
+    thrust::device_vector<real> kineticEnergy( this->para->getParD(lev)->numberOfNodes, c0o1);
+    thrust::device_vector<uint> isFluid      ( this->para->getParD(lev)->numberOfNodes, 0);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY,
+        para->getParD(lev)->velocityZ,
+        para->getParD(lev)->rho,
+        para->getParD(lev)->pressure,
+        para->getParD(lev)->typeOfGridNode,
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->numberOfNodes,
+        para->getParD(lev)->distributions.f[0],
+        para->getParD(lev)->isEvenTimestep); 
+    getLastCudaError("LBCalcMacCompSP27 execution failed"); 
+
+    kineticEnergyKernel<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX, 
+        para->getParD(lev)->velocityY, 
+        para->getParD(lev)->velocityZ, 
+        para->getParD(lev)->rho, 
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->neighborInverse,
+        para->getParD(lev)->typeOfGridNode,
+        kineticEnergy.data().get(), 
+        isFluid.data().get(),
+        para->getParD(lev)->numberOfNodes);
+    cudaDeviceSynchronize();
+
+    getLastCudaError("kineticEnergyKernel execution failed");
 
 	 real EKin               = thrust::reduce(kineticEnergy.begin(), kineticEnergy.end(), c0o1, thrust::plus<real>());
      uint numberOfFluidNodes = thrust::reduce(isFluid.begin(),       isFluid.end(),       0,    thrust::plus<uint>());
@@ -99,7 +85,7 @@ bool KineticEnergyAnalyzer::run(uint iter)
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat)
+__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes)
 {
     //////////////////////////////////////////////////////////////////////////
     const uint x = threadIdx.x;  // Globaler x-Index 
@@ -115,7 +101,7 @@ __global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uin
 
     //if( index % 34 == 0 || index % 34 == 33 ) return;
 
-    if( index >= size_Mat) return;
+    if( index >= (uint)numberOfLBnodes) return;
 
 	unsigned int BC;
 	BC = geo[index];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 63fc5be0ebe5d4a26d4662ee8c0dddbc3098247a..4faea21102b6a68dd9a0aa30e9cecc7eba6051b0 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -18,2176 +18,1644 @@
 
 #include "Parameter/Parameter.h"
 //////////////////////////////////////////////////////////////////////////
-void KernelCas27( unsigned int grid_nx,
-                             unsigned int grid_ny,
-                             unsigned int grid_nz,
-                             real s9,
-                             unsigned int* bcMatD,
-                             unsigned int* neighborX,
-                             unsigned int* neighborY,
-                             unsigned int* neighborZ,
-                             real* DD,
-                             int size_Mat,
-                             bool EvenOrOdd)
-{
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
-
-      LB_Kernel_Casc27<<< grid, threads >>>( s9,
-                                             bcMatD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             DD,
-                                             size_Mat,
-                                             EvenOrOdd);
-     getLastCudaError("LB_Kernel_Casc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSP27( unsigned int numberOfThreads,
-                               real s9,
-                               unsigned int* bcMatD,
-                               unsigned int* neighborX,
-                               unsigned int* neighborY,
-                               unsigned int* neighborZ,
-                               real* DD,
-                               int size_Mat,
-                               bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LB_Kernel_Casc_SP_27<<< grid, threads >>>(s9,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSPMS27( unsigned int numberOfThreads,
-                                 real s9,
-                                 unsigned int* bcMatD,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 real* DD,
-                                 int size_Mat,
-                                 bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LB_Kernel_Casc_SP_MS_27<<< grid, threads >>>(s9,
-                                                   bcMatD,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   DD,
-                                                   size_Mat,
-                                                   EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSPMSOHM27( unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LB_Kernel_Casc_SP_MS_OHM_27<<< grid, threads >>>(  s9,
-                                                         bcMatD,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         DD,
-                                                         size_Mat,
-                                                         EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
+void KernelCas27(
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
+
+    LB_Kernel_Casc27<<< grid, threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelCasSP27( 
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Casc_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelCasSPMS27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Casc_SP_MS_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelCasSPMSOHM27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Casc_SP_MS_OHM_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumCompSRTSP27(
-	unsigned int numberOfThreads,
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid, threads >>>(
-	   omega,
-	   bcMatD,
-	   neighborX,
-	   neighborY,
-	   neighborZ,
-	   DDStart,
-	   size_Mat,
-	   level,
-	   forces,
-	   EvenOrOdd);
-      getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKum1hSP27(    unsigned int numberOfThreads,
-									real omega,
-									real deltaPhi,
-									real angularVelocity,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* DDStart,
-									int size_Mat,
-									bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-		LB_Kernel_Kum_1h_SP_27<<< grid, threads >>>(omega,
-													deltaPhi,
-													angularVelocity,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													coordX,
-													coordY,
-													coordZ,
-													DDStart,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCascadeSP27(  unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-		LB_Kernel_Cascade_SP_27<<< grid, threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumNewSP27(   unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-		LB_Kernel_Kum_New_SP_27<<< grid, threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumNewCompSP27(unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									int size_Array,
-									int level,
-									real* forces,
-									bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-		//LB_Kernel_Kum_New_Comp_SP_27<<< grid, threads >>>(	s9,
-		//													bcMatD,
-		//													neighborX,
-		//													neighborY,
-		//													neighborZ,
-		//													DD,
-		//													size_Mat,
-		//													level,
-		//													forces,
-		//													EvenOrOdd);
-		//getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
-}
-
-//////////////////////////////////////////////////////////////////////////
-void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
-																	real s9,
-																	unsigned int* bcMatD,
-																	unsigned int* neighborX,
-																	unsigned int* neighborY,
-																	unsigned int* neighborZ,
-																	real* DD,
-																	int size_Mat,
-																	int size_Array,
-																	int level,
-																	real* forces,
-																	bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid, threads >>>(	s9,
-																						bcMatD,
-																						neighborX,
-																						neighborY,
-																						neighborZ,
-																						DD,
-																						size_Mat,
-																						level,
-																						forces,
-																						EvenOrOdd);
-		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
-														real s9,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DD,
-														int size_Mat,
-														int size_Array,
-														int level,
-														real* forces,
-														bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid, threads >>>(	s9,
-																		bcMatD,
-																		neighborX,
-																		neighborY,
-																		neighborZ,
-																		DD,
-																		size_Mat,
-																		level,
-																		forces,
-																		EvenOrOdd);
-		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CumulantOneChimCompSP27(unsigned int numberOfThreads,
-										real s9,
-										unsigned int* bcMatD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* DD,
-										int size_Mat,
-										int size_Array,
-										int level,
-										real* forces,
-										bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	Cumulant_One_chim_Comp_SP_27 <<< grid, threads >>>(	s9,
-														bcMatD,
-														neighborX,
-														neighborY,
-														neighborZ,
-														DD,
-														size_Mat,
-														level,
-														forces,
-														EvenOrOdd);
-		getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumIsoTestSP27(unsigned int numberOfThreads,
-									 real s9,
-									 unsigned int* bcMatD,
-									 unsigned int* neighborX,
-									 unsigned int* neighborY,
-									 unsigned int* neighborZ,
-									 real* DD,
-									 real* dxxUx,
-									 real* dyyUy,
-									 real* dzzUz,
-									 int size_Mat,
-									 bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	LB_Kernel_Kum_IsoTest_SP_27<<< grid, threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													dxxUx,
-													dyyUy,
-													dzzUz,
-													size_Mat,
-													EvenOrOdd);
-	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumCompSP27(  unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-		LB_Kernel_Kum_Comp_SP_27<<< grid, threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
-									   real omega,
-									   unsigned int* neighborX,
-									   unsigned int* neighborY,
-									   unsigned int* neighborZ,
-									   real* DD,
-									   int size_Mat,
-									   int level,
-									   real* forces,
-									   real porosity,
-									   real darcy,
-									   real forchheimer,
-									   unsigned int sizeOfPorousMedia,
-									   unsigned int* nodeIdsPorousMedia,
-									   bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>(omega,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  level,
-														  forces,
-														  porosity,
-														  darcy,
-														  forchheimer,
-														  sizeOfPorousMedia,
-														  nodeIdsPorousMedia,
-														  EvenOrOdd);
-	getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>(
+        omega,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DDStart,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+        getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKum1hSP27(
+    unsigned int numberOfThreads,
+    real omega,
+    real deltaPhi,
+    real angularVelocity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(
+        omega,
+        deltaPhi,
+        angularVelocity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        coordX,
+        coordY,
+        coordZ,
+        DDStart,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_1h_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelCascadeSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumNewSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+
+    LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumNewCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>(	s9,
+    //													bcMatD,
+    //													neighborX,
+    //													neighborY,
+    //													neighborZ,
+    //													DD,
+    //													numberOfLBnodes,
+    //													level,
+    //													forces,
+    //													EvenOrOdd);
+    //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
+}
+
+//////////////////////////////////////////////////////////////////////////
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CumulantOnePreconditionedChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CumulantOneChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumIsoTestSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* dxxUx,
+    real* dyyUy,
+    real* dzzUz,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        dxxUx,
+        dyyUy,
+        dzzUz,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+
+    LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelPMCumOneCompSP27(
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    real porosity,
+    real darcy,
+    real forchheimer,
+    unsigned int sizeOfPorousMedia,
+    unsigned int* nodeIdsPorousMedia,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        omega,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        porosity,
+        darcy,
+        forchheimer,
+        sizeOfPorousMedia,
+        nodeIdsPorousMedia,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelWaleBySoniMalavCumAA2016CompSP27(
-	unsigned int numberOfThreads,
-	real s9,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int* neighborWSB,
-	real* veloX,
-	real* veloY,
-	real* veloZ,
-	real* DD,
-	real* turbulentViscosity,
-	int size_Mat,
-	int size_Array,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid, threads >> >(
-		s9,
-		bcMatD,
-		neighborX,
-		neighborY,
-		neighborZ,
-		neighborWSB,
-		veloX,
-		veloY,
-		veloZ,
-		DD,
-		turbulentViscosity,
-		size_Mat,
-		level,
-		forces,
-		EvenOrOdd);
-	getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelADincomp7(   unsigned int numberOfThreads,
-								   real diffusivity,
-								   unsigned int* bcMatD,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   real* DD,
-								   real* DD7,
-								   int size_Mat,
-								   bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LB_Kernel_AD_Incomp_7<<< grid, threads >>>( diffusivity,
-												  bcMatD,
-												  neighborX,
-												  neighborY,
-												  neighborZ,
-												  DD,
-												  DD7,
-												  size_Mat,
-												  EvenOrOdd);
-      getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelADincomp27( unsigned int numberOfThreads,
-								  real diffusivity,
-								  unsigned int* bcMatD,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  real* DD,
-								  real* DD27,
-								  int size_Mat,
-								  bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LB_Kernel_AD_Incomp_27<<< grid, threads >>>( diffusivity,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													DD27,
-													size_Mat,
-													EvenOrOdd);
-      getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Init27( int myid,
-                        int numprocs,
-                        real u0,
-                        unsigned int* geoD,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        real* vParab,
-                        unsigned int size_Mat,
-                        unsigned int grid_nx,
-                        unsigned int grid_ny,
-                        unsigned int grid_nz,
-                        real* DD,
-                        int level,
-                        int maxlevel)
-{
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
-
-      LBInit27<<< grid, threads >>> (  myid,
-                                       numprocs,
-                                       u0,
-                                       geoD,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       vParab,
-                                       size_Mat,
-                                       grid_nx,
-                                       grid_ny,
-                                       grid_nz,
-                                       DD,
-                                       level,
-                                       maxlevel);
-      getLastCudaError("LBInit27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitNonEqPartSP27( unsigned int numberOfThreads,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int* neighborWSB,
-                                   unsigned int* geoD,
-                                   real* rho,
-                                   real* ux,
-                                   real* uy,
-                                   real* uz,
-                                   unsigned int size_Mat,
-                                   real* DD,
-                                   real omega,
-                                   bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBInitNonEqPartSP27<<< grid, threads >>>( neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                neighborWSB,
-                                                geoD,
-                                                rho,
-                                                ux,
-                                                uy,
-                                                uz,
-                                                size_Mat,
-                                                DD,
-                                                omega,
-                                                EvenOrOdd);
-      getLastCudaError("LBInitNonEqPartSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitThS7(     unsigned int numberOfThreads,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int* geoD,
-                              real* Conc,
-                              real* ux,
-                              real* uy,
-                              real* uz,
-                              unsigned int size_Mat,
-                              real* DD7,
-                              bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      InitAD7<<< grid, threads >>>( neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       geoD,
-                                       Conc,
-                                       ux,
-                                       uy,
-                                       uz,
-                                       size_Mat,
-                                       DD7,
-                                       EvenOrOdd);
-      getLastCudaError("InitAD7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitADDev27( unsigned int numberOfThreads,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int* geoD,
-                           real* Conc,
-                           real* ux,
-                           real* uy,
-                           real* uz,
-                           unsigned int size_Mat,
-                           real* DD27,
-                           bool EvenOrOdd)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      InitAD27<<< grid, threads >>>(neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       geoD,
-                                       Conc,
-                                       ux,
-                                       uy,
-                                       uz,
-                                       size_Mat,
-                                       DD27,
-                                       EvenOrOdd);
-      getLastCudaError("InitAD27 execution failed");
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    real* veloX,
+    real* veloY,
+    real* veloZ,
+    real* DD,
+    real* turbulentViscosity,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        veloX,
+        veloY,
+        veloZ,
+        DD,
+        turbulentViscosity,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelADincomp7(
+    unsigned int numberOfThreads,
+    real diffusivity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* DD7,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>(
+        diffusivity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        DD7,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelADincomp27(
+    unsigned int numberOfThreads,
+    real diffusivity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* DD27,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>(
+        diffusivity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        DD27,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Init27(
+    int myid,
+    int numprocs,
+    real u0,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* vParab,
+    unsigned long long numberOfLBnodes,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real* DD,
+    int level,
+    int maxlevel)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, grid_nz );
+
+    LBInit27<<< grid, threads >>> (
+        myid,
+        numprocs,
+        u0,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        vParab,
+        numberOfLBnodes,
+        grid_nx,
+        grid_ny,
+        grid_nz,
+        DD,
+        level,
+        maxlevel);
+    getLastCudaError("LBInit27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitNonEqPartSP27(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned int* geoD,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    real omega,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        geoD,
+        rho,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD,
+        omega,
+        EvenOrOdd);
+    getLastCudaError("LBInitNonEqPartSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitThS7(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* geoD,
+    real* Conc,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD7,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    InitAD7<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        geoD,
+        Conc,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD7,
+        EvenOrOdd);
+    getLastCudaError("InitAD7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitADDev27(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* geoD,
+    real* Conc,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD27,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    InitAD27<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        geoD,
+        Conc,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD27,
+        EvenOrOdd);
+    getLastCudaError("InitAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void PostProcessorF3_2018Fehlberg(
-	unsigned int numberOfThreads,
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* rhoOut,
-	real* vxOut,
-	real* vyOut,
-	real* vzOut,
-	real* DDStart,
-	real* G6,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
-{
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	  LB_PostProcessor_F3_2018_Fehlberg <<< grid, threads >>> (   omega,
-																  bcMatD,
-																  neighborX,
-																  neighborY,
-																  neighborZ,
-																  rhoOut,
-																  vxOut,
-																  vyOut,
-																  vzOut,
-																  DDStart,
-																  G6,
-																  size_Mat,
-																  level,
-																  forces,
-																  EvenOrOdd);
-      getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMac27( real* vxD,
-                           real* vyD,
-                           real* vzD,
-                           real* rhoD,
-                           unsigned int* geoD,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           unsigned int grid_nx,
-                           unsigned int grid_ny,
-                           unsigned int grid_nz,
-                           real* DD,
-                           bool isEvenTimestep)
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rhoOut,
+    real* vxOut,
+    real* vyOut,
+    real* vzOut,
+    real* DDStart,
+    real* G6,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> (
+        omega,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        rhoOut,
+        vxOut,
+        vyOut,
+        vzOut,
+        DDStart,
+        G6,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMac27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real* DD,
+    bool isEvenTimestep)
 {
    dim3 threads       ( grid_nx, 1, 1 );
    dim3 grid          ( grid_ny, grid_nz );
 
-      LBCalcMac27<<< grid, threads >>> (  vxD,
-                                          vyD,
-                                          vzD,
-                                          rhoD,
-                                          geoD,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          DD,
-                                          isEvenTimestep);
-      getLastCudaError("LBCalcMac27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacSP27( real* vxD,
-                             real* vyD,
-                             real* vzD,
-                             real* rhoD,
-                             real* pressD,
-                             unsigned int* geoD,
-                             unsigned int* neighborX,
-                             unsigned int* neighborY,
-                             unsigned int* neighborZ,
-                             unsigned int size_Mat,
-                             unsigned int numberOfThreads,
-                             real* DD,
-                             bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMacSP27<<< grid, threads >>> (   vxD,
-                                             vyD,
-                                             vzD,
-                                             rhoD,
-                                             pressD,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD,
-                                             isEvenTimestep);
-      getLastCudaError("LBCalcMacSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacCompSP27( real* vxD,
-								 real* vyD,
-								 real* vzD,
-								 real* rhoD,
-								 real* pressD,
-								 unsigned int* geoD,
-								 unsigned int* neighborX,
-								 unsigned int* neighborY,
-								 unsigned int* neighborZ,
-								 unsigned int size_Mat,
-								 unsigned int numberOfThreads,
-								 real* DD,
-								 bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMacCompSP27<<< grid, threads >>> (   vxD,
-												 vyD,
-												 vzD,
-												 rhoD,
-												 pressD,
-												 geoD,
-												 neighborX,
-												 neighborY,
-												 neighborZ,
-												 size_Mat,
-												 DD,
-												 isEvenTimestep);
-      getLastCudaError("LBCalcMacSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacThS7(  real* Conc,
-                              unsigned int* geoD,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-                              real* DD7,
-                              bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      CalcConc7<<< grid, threads >>> (Conc,
-                                          geoD,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          DD7,
-                                          isEvenTimestep);
-      getLastCudaError("CalcConc7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void PlaneConcThS7(real* Conc,
-							  int* kPC,
-							  unsigned int numberOfPointskPC,
-							  unsigned int* geoD,
-							  unsigned int* neighborX,
-							  unsigned int* neighborY,
-							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-							  real* DD7,
-							  bool isEvenTimestep)
-{
-   int Grid = (numberOfPointskPC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      GetPlaneConc7<<< grid, threads >>> (	Conc,
-												kPC,
-												numberOfPointskPC,
-												geoD,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												DD7,
-												isEvenTimestep);
-      getLastCudaError("GetPlaneConc7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void PlaneConcThS27(real* Conc,
-							   int* kPC,
-							   unsigned int numberOfPointskPC,
-							   unsigned int* geoD,
-							   unsigned int* neighborX,
-							   unsigned int* neighborY,
-							   unsigned int* neighborZ,
-							   unsigned int size_Mat,
-                               unsigned int numberOfThreads,
-							   real* DD27,
-							   bool isEvenTimestep)
-{
-   int Grid = (numberOfPointskPC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      GetPlaneConc27<<< grid, threads >>> (	Conc,
-												kPC,
-												numberOfPointskPC,
-												geoD,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												DD27,
-												isEvenTimestep);
-      getLastCudaError("GetPlaneConc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcConcentration27( unsigned int numberOfThreads,
-                                     real* Conc,
-                                     unsigned int* geoD,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     real* DD27,
-                                     bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      CalcConc27<<< grid, threads >>> (  Conc,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD27,
-                                             isEvenTimestep);
-      getLastCudaError("CalcConc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMedSP27(  real* vxD,
-                              real* vyD,
-                              real* vzD,
-                              real* rhoD,
-                              real* pressD,
-                              unsigned int* geoD,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-                              real* DD,
-                              bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMedSP27<<< grid, threads >>> (   vxD,
-                                             vyD,
-                                             vzD,
-                                             rhoD,
-                                             pressD,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD,
-                                             isEvenTimestep);
-      getLastCudaError("LBCalcMedSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMedCompSP27(  real* vxD,
-								  real* vyD,
-								  real* vzD,
-								  real* rhoD,
-								  real* pressD,
-								  unsigned int* geoD,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  unsigned int numberOfThreads,
-								  real* DD,
-								  bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMedCompSP27<<< grid, threads >>> (   vxD,
-												 vyD,
-												 vzD,
-												 rhoD,
-												 pressD,
-												 geoD,
-												 neighborX,
-												 neighborY,
-												 neighborZ,
-												 size_Mat,
-												 DD,
-												 isEvenTimestep);
-      getLastCudaError("LBCalcMedSP27 execution failed");
+    LBCalcMac27<<< grid, threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMac27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacThS7(
+    real* Conc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD7,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    CalcConc7<<< grid.grid, grid.threads >>> (
+        Conc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD7,
+        isEvenTimestep);
+    getLastCudaError("CalcConc7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PlaneConcThS7(
+    real* Conc,
+    int* kPC,
+    unsigned int numberOfPointskPC,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD7,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+
+    GetPlaneConc7<<< grid.grid, grid.threads >>> (
+        Conc,
+        kPC,
+        numberOfPointskPC,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD7,
+        isEvenTimestep);
+    getLastCudaError("GetPlaneConc7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PlaneConcThS27(
+    real* Conc,
+    int* kPC,
+    unsigned int numberOfPointskPC,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD27,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+
+    GetPlaneConc27<<< grid.grid, grid.threads >>> (
+        Conc,
+        kPC,
+        numberOfPointskPC,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD27,
+        isEvenTimestep);
+    getLastCudaError("GetPlaneConc27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcConcentration27(
+    unsigned int numberOfThreads,
+    real* Conc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD27,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    CalcConc27<<< grid.grid, grid.threads >>> (
+        Conc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD27,
+        isEvenTimestep);
+    getLastCudaError("CalcConc27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMedCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedCompSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep)
-{
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LBCalcMedCompAD27 <<< grid, threads >>> (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		concD,
-		geoD,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		DD,
-		DD_AD,
-		isEvenTimestep);
-	getLastCudaError("LBCalcMedAD27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacMedSP27(  real* vxD,
-                                 real* vyD,
-                                 real* vzD,
-                                 real* rhoD,
-                                 real* pressD,
-                                 unsigned int* geoD,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int tdiff,
-                                 unsigned int size_Mat,
-                                 unsigned int numberOfThreads,
-                                 bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMacMedSP27<<< grid, threads >>> (   vxD,
-                                                vyD,
-                                                vzD,
-                                                rhoD,
-                                                pressD,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                tdiff,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("LBCalcMacMedSP27 execution failed");
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD_AD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        DD_AD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedCompAD27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int tdiff,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacMedSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        tdiff,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	bool isEvenTimestep)
-{
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LBResetMedianValuesSP27 << < grid, threads >> > (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("LBResetMedianValuesSP27 execution failed");
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBResetMedianValuesSP27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	bool isEvenTimestep)
-{
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LBResetMedianValuesAD27 << < grid, threads >> > (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		concD,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("LBResetMedianValuesAD27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
-										 real* kyzFromfcNEQ,
-										 real* kxzFromfcNEQ,
-										 real* kxxMyyFromfcNEQ,
-										 real* kxxMzzFromfcNEQ,
-										 unsigned int* geoD,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
-										 unsigned int numberOfThreads,
-										 real* DD,
-										 bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalc2ndMomentsIncompSP27<<< grid, threads >>> (  kxyFromfcNEQ,
-														 kyzFromfcNEQ,
-														 kxzFromfcNEQ,
-														 kxxMyyFromfcNEQ,
-														 kxxMzzFromfcNEQ,
-														 geoD,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 size_Mat,
-														 DD,
-														 isEvenTimestep);
-      getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
-										real* kyzFromfcNEQ,
-										real* kxzFromfcNEQ,
-										real* kxxMyyFromfcNEQ,
-										real* kxxMzzFromfcNEQ,
-										unsigned int* geoD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										unsigned int size_Mat,
-										unsigned int numberOfThreads,
-										real* DD,
-										bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalc2ndMomentsCompSP27<<< grid, threads >>> (kxyFromfcNEQ,
-													 kyzFromfcNEQ,
-													 kxzFromfcNEQ,
-													 kxxMyyFromfcNEQ,
-													 kxxMzzFromfcNEQ,
-													 geoD,
-													 neighborX,
-													 neighborY,
-													 neighborZ,
-													 size_Mat,
-													 DD,
-													 isEvenTimestep);
-      getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc3rdMomentsIncompSP27(real* CUMbbb,
-										 real* CUMabc,
-										 real* CUMbac,
-										 real* CUMbca,
-										 real* CUMcba,
-										 real* CUMacb,
-										 real* CUMcab,
-										 unsigned int* geoD,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
-										 unsigned int numberOfThreads,
-										 real* DD,
-										 bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalc3rdMomentsIncompSP27<<< grid, threads >>> (  CUMbbb,
-														 CUMabc,
-														 CUMbac,
-														 CUMbca,
-														 CUMcba,
-														 CUMacb,
-														 CUMcab,
-														 geoD,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 DD,
-														 size_Mat,
-														 isEvenTimestep);
-      getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc3rdMomentsCompSP27( real* CUMbbb,
-										real* CUMabc,
-										real* CUMbac,
-										real* CUMbca,
-										real* CUMcba,
-										real* CUMacb,
-										real* CUMcab,
-										unsigned int* geoD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										unsigned int size_Mat,
-										unsigned int numberOfThreads,
-										real* DD,
-										bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalc3rdMomentsCompSP27<<< grid, threads >>> (CUMbbb,
-													 CUMabc,
-													 CUMbac,
-													 CUMbca,
-													 CUMcba,
-													 CUMacb,
-													 CUMcab,
-													 geoD,
-													 neighborX,
-													 neighborY,
-													 neighborZ,
-													 DD,
-													 size_Mat,
-													 isEvenTimestep);
-      getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcHigherMomentsIncompSP27(real* CUMcbb,
-											real* CUMbcb,
-											real* CUMbbc,
-											real* CUMcca,
-											real* CUMcac,
-											real* CUMacc,
-											real* CUMbcc,
-											real* CUMcbc,
-											real* CUMccb,
-											real* CUMccc,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											unsigned int numberOfThreads,
-											real* DD,
-											bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcHigherMomentsIncompSP27<<< grid, threads >>> (CUMcbb,
-														  CUMbcb,
-														  CUMbbc,
-														  CUMcca,
-														  CUMcac,
-														  CUMacc,
-														  CUMbcc,
-														  CUMcbc,
-														  CUMccb,
-														  CUMccc,
-														  geoD,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  isEvenTimestep);
-      getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcHigherMomentsCompSP27(  real* CUMcbb,
-											real* CUMbcb,
-											real* CUMbbc,
-											real* CUMcca,
-											real* CUMcac,
-											real* CUMacc,
-											real* CUMbcc,
-											real* CUMcbc,
-											real* CUMccb,
-											real* CUMccc,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											unsigned int numberOfThreads,
-											real* DD,
-											bool isEvenTimestep)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcHigherMomentsCompSP27<<< grid, threads >>> (  CUMcbb,
-														  CUMbcb,
-														  CUMbbc,
-														  CUMcca,
-														  CUMcac,
-														  CUMacc,
-														  CUMbcc,
-														  CUMcbc,
-														  CUMccb,
-														  CUMccc,
-														  geoD,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  isEvenTimestep);
-      getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void LBCalcMeasurePoints27(real* vxMP,
-                                      real* vyMP,
-                                      real* vzMP,
-                                      real* rhoMP,
-                                      unsigned int* kMP,
-                                      unsigned int numberOfPointskMP,
-                                      unsigned int MPClockCycle,
-                                      unsigned int t,
-                                      unsigned int* geoD,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      real* DD,
-                                      unsigned int numberOfThreads,
-                                      bool isEvenTimestep)
-{
-   int Grid = (numberOfPointskMP / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBCalcMeasurePoints<<< grid, threads >>> (vxMP,
-                                                vyMP,
-                                                vzMP,
-                                                rhoMP,
-                                                kMP,
-                                                numberOfPointskMP,
-                                                MPClockCycle,
-                                                t,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                DD,
-                                                isEvenTimestep);
-      getLastCudaError("LBCalcMeasurePoints execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void BcPress27( int nx,
-                           int ny,
-                           int tz,
-                           unsigned int grid_nx,
-                           unsigned int grid_ny,
-                           unsigned int* bcMatD,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           real* DD,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
-{
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, 1 );
-
-      LB_BC_Press_East27<<< grid, threads >>> ( nx,
-                                                ny,
-                                                tz,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("LB_BC_Press_East27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void BcVel27(int nx,
-                        int ny,
-                        int nz,
-                        int itz,
-                        unsigned int grid_nx,
-                        unsigned int grid_ny,
-                        unsigned int* bcMatD,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        real* DD,
-                        unsigned int size_Mat,
-                        bool isEvenTimestep,
-                        real u0x,
-                        real om)
-{
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, 1 );
-
-      LB_BC_Vel_West_27<<< grid, threads >>> (  nx,
-                                                ny,
-                                                nz,
-                                                itz,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                isEvenTimestep,
-                                                u0x,
-                                                grid_nx,
-                                                grid_ny,
-                                                om);
-      getLastCudaError("LB_BC_Vel_West_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressDev7( unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD7,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADPress7<<< gridQ, threads >>>( DD,
-                                       DD7,
-                                       temp,
-                                       velo,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QADPress7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressDev27(unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD27,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADPress27<<< gridQ, threads >>>(   DD,
-                                          DD27,
-                                          temp,
-                                          velo,
-                                          diffusivity,
-                                          k_Q,
-                                          QQ,
-                                          numberOfBCnodes,
-                                          om1,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-      getLastCudaError("QADPress27 execution failed");
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBResetMedianValuesAD27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesAD27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc2ndMomentsIncompSP27(
+    real* kxyFromfcNEQ,
+    real* kyzFromfcNEQ,
+    real* kxzFromfcNEQ,
+    real* kxxMyyFromfcNEQ,
+    real* kxxMzzFromfcNEQ,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        kxyFromfcNEQ,
+        kyzFromfcNEQ,
+        kxzFromfcNEQ,
+        kxxMyyFromfcNEQ,
+        kxxMzzFromfcNEQ,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc2ndMomentsCompSP27(
+    real* kxyFromfcNEQ,
+    real* kyzFromfcNEQ,
+    real* kxzFromfcNEQ,
+    real* kxxMyyFromfcNEQ,
+    real* kxxMzzFromfcNEQ,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        kxyFromfcNEQ,
+        kyzFromfcNEQ,
+        kxzFromfcNEQ,
+        kxxMyyFromfcNEQ,
+        kxxMzzFromfcNEQ,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc3rdMomentsIncompSP27(
+    real* CUMbbb,
+    real* CUMabc,
+    real* CUMbac,
+    real* CUMbca,
+    real* CUMcba,
+    real* CUMacb,
+    real* CUMcab,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        CUMbbb,
+        CUMabc,
+        CUMbac,
+        CUMbca,
+        CUMcba,
+        CUMacb,
+        CUMcab,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc3rdMomentsCompSP27(
+    real* CUMbbb,
+    real* CUMabc,
+    real* CUMbac,
+    real* CUMbca,
+    real* CUMcba,
+    real* CUMacb,
+    real* CUMcab,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        CUMbbb,
+        CUMabc,
+        CUMbac,
+        CUMbca,
+        CUMcba,
+        CUMacb,
+        CUMcab,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcHigherMomentsIncompSP27(
+    real* CUMcbb,
+    real* CUMbcb,
+    real* CUMbbc,
+    real* CUMcca,
+    real* CUMcac,
+    real* CUMacc,
+    real* CUMbcc,
+    real* CUMcbc,
+    real* CUMccb,
+    real* CUMccc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        CUMcbb,
+        CUMbcb,
+        CUMbbc,
+        CUMcca,
+        CUMcac,
+        CUMacc,
+        CUMbcc,
+        CUMcbc,
+        CUMccb,
+        CUMccc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcHigherMomentsCompSP27(
+    real* CUMcbb,
+    real* CUMbcb,
+    real* CUMbbc,
+    real* CUMcca,
+    real* CUMcac,
+    real* CUMacc,
+    real* CUMbcc,
+    real* CUMcbc,
+    real* CUMccb,
+    real* CUMccc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        CUMcbb,
+        CUMbcb,
+        CUMbbc,
+        CUMcca,
+        CUMcac,
+        CUMacc,
+        CUMbcc,
+        CUMcbc,
+        CUMccb,
+        CUMccc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void LBCalcMeasurePoints27(
+    real* vxMP,
+    real* vyMP,
+    real* vzMP,
+    real* rhoMP,
+    unsigned int* kMP,
+    unsigned int numberOfPointskMP,
+    unsigned int MPClockCycle,
+    unsigned int t,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP);
+
+    LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (
+        vxMP,
+        vyMP,
+        vzMP,
+        rhoMP,
+        kMP,
+        numberOfPointskMP,
+        MPClockCycle,
+        t,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMeasurePoints execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void BcPress27(
+    int nx,
+    int ny,
+    int tz,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
+
+    LB_BC_Press_East27<<< grid, threads >>> (
+        nx,
+        ny,
+        tz,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LB_BC_Press_East27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void BcVel27(
+    int nx,
+    int ny,
+    int nz,
+    int itz,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    real u0x,
+    real om)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
+
+    LB_BC_Vel_West_27<<< grid, threads >>> (
+        nx,
+        ny,
+        nz,
+        itz,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep,
+        u0x,
+        grid_nx,
+        grid_ny,
+        om);
+    getLastCudaError("LB_BC_Vel_West_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPress7<<< grid.grid, grid.threads >>>(
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPress7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPress27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressNEQNeighborDev27(
-											unsigned int numberOfThreads,
-											real* DD,
-											real* DD27,
-											int* k_Q,
-											int* k_N,
-											int numberOfBCnodes,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep
-										)
-{
-
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   QADPressNEQNeighbor27<<< gridQ, threads >>>(
-												DD,
-												DD27,
-												k_Q,
-												k_N,
-												numberOfBCnodes,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep
-											  );
-   getLastCudaError("QADPressNEQNeighbor27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVelDev7(unsigned int numberOfThreads,
-                           real* DD,
-                           real* DD7,
-                           real* temp,
-                           real* velo,
-                           real diffusivity,
-                           int* k_Q,
-                           real* QQ,
-                           unsigned int numberOfBCnodes,
-                           real om1,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADVel7<<< gridQ, threads >>> (  
-                                       DD,
-                                       DD7,
-                                       temp,
-                                       velo,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QADVel7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVelDev27(  unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD27,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADVel27<<< gridQ, threads >>> ( DD,
-                                      DD27,
-                                      temp,
-                                      velo,
-                                      diffusivity,
-                                      k_Q,
-                                      QQ,
-                                      numberOfBCnodes,
-                                      om1,
-                                      neighborX,
-                                      neighborY,
-                                      neighborZ,
-                                      size_Mat,
-                                      isEvenTimestep);
-      getLastCudaError("QADVel27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADDev7(unsigned int numberOfThreads,
-                        real* DD,
-                        real* DD7,
-                        real* temp,
-                        real diffusivity,
-                        int* k_Q,
-                        real* QQ,
-                        unsigned int numberOfBCnodes,
-                        real om1,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        unsigned int size_Mat,
-                        bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QAD7<<< gridQ, threads >>> (     DD,
-                                       DD7,
-                                       temp,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QAD7 execution failed");
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+       getLastCudaError("QADPressNEQNeighbor27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVelDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVel7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVel7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVelDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVel27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVel27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QAD7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QAD7 execution failed");
 }
 
 
@@ -2202,1700 +1670,1430 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
    uint* neighborZ,
    real* distributions,
    real* distributionsAD,
-   int size_Mat,
+   unsigned long long numberOfLBnodes,
    real* forces,
    bool isEvenTimestep)
 {
-   int Grid = (size_Mat / numberOfThreads) + 1;
-   dim3 grid(Grid, 1, 1);
-   dim3 threads(numberOfThreads, 1, 1);
+    int Grid = (numberOfLBnodes / numberOfThreads) + 1;
+    dim3 grid(Grid, 1, 1);
+    dim3 threads(numberOfThreads, 1, 1);
 
-   Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> (
-      omegaDiffusivity,
-      typeOfGridNode,
-      neighborX,
-      neighborY,
-      neighborZ,
-      distributions,
-      distributionsAD,
-      size_Mat,
-      forces,
-      isEvenTimestep);
-   getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed");
+    Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> (
+        omegaDiffusivity,
+        typeOfGridNode,
+        neighborX,
+        neighborY,
+        neighborZ,
+        distributions,
+        distributionsAD,
+        numberOfLBnodes,
+        forces,
+        isEvenTimestep);
+    getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed");
 }
 
 //////////////////////////////////////////////////////////////////////////
 void ADSlipVelDevComp(
-	uint numberOfThreads,
-	real * normalX,
-	real * normalY,
-	real * normalZ,
-	real * distributions,
-	real * distributionsAD,
-	int* QindexArray,
-	real * Qarrays,
-	uint numberOfBCnodes,
-	real omegaDiffusivity,
-	uint * neighborX,
-	uint * neighborY,
-	uint * neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep)
-{
-	int Grid = (numberOfBCnodes / numberOfThreads) + 1;
-	dim3 gridQ(Grid, 1, 1);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	AD_SlipVelDeviceComp << < gridQ, threads >> > (
-		normalX,
-		normalY,
-		normalZ,
-		distributions,
-		distributionsAD,
-		QindexArray,
-		Qarrays,
-		numberOfBCnodes,
-		omegaDiffusivity,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("AD_SlipVelDeviceComp execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-
-void QADDirichletDev27( unsigned int numberOfThreads,
-								   real* DD,
-								   real* DD27,
-								   real* temp,
-								   real diffusivity,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADDirichlet27<<< gridQ, threads >>> (
-											   DD,
-											   DD27,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
-      getLastCudaError("QADDirichletDev27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADBBDev27(unsigned int numberOfThreads,
-                           real* DD,
-                           real* DD27,
-                           real* temp,
-                           real diffusivity,
-                           int* k_Q,
-                           real* QQ,
-                           unsigned int numberOfBCnodes,
-                           real om1,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADBB27<<< gridQ, threads >>> (  DD,
-                                       DD27,
-                                       temp,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QADBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QNoSlipADincompDev7(unsigned int numberOfThreads,
-									real* DD,
-									real* DD7,
-									real* temp,
-									real diffusivity,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QNoSlipADincomp7<<< gridQ, threads >>> (
-											   DD,
-											   DD7,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
-      getLastCudaError("QNoSlipADincomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QNoSlipADincompDev27(  unsigned int numberOfThreads,
-									   real* DD,
-									   real* DD27,
-									   real* temp,
-									   real diffusivity,
-									   int* k_Q,
-									   real* QQ,
-									   unsigned int numberOfBCnodes,
-									   real om1,
-									   unsigned int* neighborX,
-									   unsigned int* neighborY,
-									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
-									   bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QNoSlipADincomp27<<< gridQ, threads >>> (
-											   DD,
-											   DD27,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
-      getLastCudaError("QNoSlipADincomp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVeloIncompDev7( unsigned int numberOfThreads,
-								   real* DD,
-								   real* DD7,
-								   real* temp,
-								   real* velo,
-								   real diffusivity,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADVeloIncomp7<<< gridQ, threads >>> ( 
-											   DD,
-											   DD7,
-											   temp,
-											   velo,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
-      getLastCudaError("QADVeloIncomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVeloIncompDev27(   unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD27,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADVeloIncomp27<<< gridQ, threads >>> (
-											  DD,
-											  DD27,
-											  temp,
-											  velo,
-											  diffusivity,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
-      getLastCudaError("QADVeloIncomp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressIncompDev7( unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD7,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADPressIncomp7<<< gridQ, threads >>>(
-											   DD,
-											   DD7,
-											   temp,
-											   velo,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
-      getLastCudaError("QADPressIncomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressIncompDev27(  unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD27,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QADPressIncomp27<<< gridQ, threads >>>(
-											  DD,
-											  DD27,
-											  temp,
-											  velo,
-											  diffusivity,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
-      getLastCudaError("QADPressIncomp27 execution failed");
+    uint numberOfThreads,
+    real * normalX,
+    real * normalY,
+    real * normalZ,
+    real * distributions,
+    real * distributionsAD,
+    int* QindexArray,
+    real * Qarrays,
+    uint numberOfBCnodes,
+    real omegaDiffusivity,
+    uint * neighborX,
+    uint * neighborY,
+    uint * neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    AD_SlipVelDeviceComp <<< grid.grid, grid.threads >>> (
+        normalX,
+        normalY,
+        normalZ,
+        distributions,
+        distributionsAD,
+        QindexArray,
+        Qarrays,
+        numberOfBCnodes,
+        omegaDiffusivity,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("AD_SlipVelDeviceComp execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+
+void QADDirichletDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADDirichlet27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADDirichletDev27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADBBDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADBB27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADBB27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QNoSlipADincompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QNoSlipADincomp7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QNoSlipADincomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QNoSlipADincompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QNoSlipADincomp27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QNoSlipADincomp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVeloIncompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVeloIncomp7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVeloIncomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVeloIncompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVeloIncomp27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVeloIncomp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressIncompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressIncomp7<<< grid.grid, grid.threads >>>(
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPressIncomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressIncompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressIncomp27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPressIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-
-      QDevice27<<< grid, threads >>> (
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
+    QDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-
-      QDeviceComp27<<< grid, threads >>> (
-           parameterDevice->distributions.f[0],
-           boundaryCondition->k,
-           boundaryCondition->q27[0],
-           boundaryCondition->numberOfBCnodes,
-           parameterDevice->omega,
-           parameterDevice->neighborX,
-           parameterDevice->neighborY,
-           parameterDevice->neighborZ,
-           parameterDevice->numberOfNodes,
-           parameterDevice->isEvenTimestep);
-      getLastCudaError("QDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QDevCompThinWalls27(unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* geom,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   QDeviceCompThinWallsPartOne27 <<< gridQ, threads >>> (DD,
-														 k_Q,
-														 QQ,
-														 numberOfBCnodes,
-														 om1,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 size_Mat,
-														 isEvenTimestep);
-   getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed");
-
-   QThinWallsPartTwo27 <<< gridQ, threads >>> ( DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												geom,
-												neighborX,
-												neighborY,
-												neighborZ,
-												neighborWSB,
-												size_Mat,
-												isEvenTimestep);
-   getLastCudaError("QThinWallsPartTwo27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
+    QDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QDeviceComp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QDevCompThinWalls27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* geom,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed");
+
+    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        geom,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QThinWallsPartTwo27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1);
-
-   QDevice3rdMomentsComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1);
+
+    QDevice3rdMomentsComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
    getLastCudaError("QDevice3rdMomentsComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QDevIncompHighNu27( unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QDeviceIncompHighNu27<<< gridQ, threads >>> (
-												   DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
-      getLastCudaError("QDeviceIncompHighNu27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QDevCompHighNu27(   unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QDeviceCompHighNu27<<< gridQ, threads >>> (
-												   DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
-      getLastCudaError("QDevice27 execution failed");
+void QDevIncompHighNu27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceIncompHighNu27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QDevCompHighNu27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceCompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QVelDevPlainBB27<<< grid, threads >>> (
-         boundaryCondition->Vx,
-         boundaryCondition->Vy,
-         boundaryCondition->Vz,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QVelDevicePlainBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDeviceCouette27(unsigned int numberOfThreads,
-									real* vx,
-									real* vy,
-									real* vz,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVelDevCouette27<<< gridQ, threads >>> ( vx,
-												vy,
-												vz,
-												DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												om1,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
-      getLastCudaError("QVelDevicePlainBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevice1h27(   unsigned int numberOfThreads,
-								  int nx,
-								  int ny,
-								  real* vx,
-								  real* vy,
-								  real* vz,
-								  real* DD,
-								  int* k_Q,
-								  real* QQ,
-								  unsigned int numberOfBCnodes,
-								  real om1,
-								  real Phi,
-								  real angularVelocity,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  real* coordX,
-								  real* coordY,
-								  real* coordZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVelDev1h27<<< gridQ, threads >>> (nx,
-                                          ny,
-                                          vx,
-                                          vy,
-                                          vz,
-                                          DD,
-                                          k_Q,
-                                          QQ,
-                                          numberOfBCnodes,
-                                          om1,
-										  Phi,
-										  angularVelocity,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-										  coordX,
-										  coordY,
-										  coordZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-      getLastCudaError("QVelDevice27 execution failed");
+    QVelDevPlainBB27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDevicePlainBB27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDeviceCouette27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDevCouette27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDevCouette27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevice1h27(
+    unsigned int numberOfThreads,
+    int nx,
+    int ny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDev1h27<<< grid.grid, grid.threads >>> (
+        nx,
+        ny,
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        Phi,
+        angularVelocity,
+        neighborX,
+        neighborY,
+        neighborZ,
+        coordX,
+        coordY,
+        coordZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDev1h27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-      QVelDevice27<<< grid, threads >>> (
-            parameterDevice->nx,
-            parameterDevice->ny,
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-      getLastCudaError("QVelDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVelDeviceCompPlusSlip27<<< gridQ, threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
-      getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
+    QVelDevice27<<< grid, threads >>> (
+        parameterDevice->nx,
+        parameterDevice->ny,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevCompPlusSlip27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QVelDeviceComp27<<< grid, threads >>> (
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,        
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
+    QVelDeviceComp27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
    getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QVelDevCompThinWalls27(unsigned int numberOfThreads,
-							           real* vx,
-							           real* vy,
-							           real* vz,
-							           real* DD,
-							           int* k_Q,
-							           real* QQ,
-							           unsigned int numberOfBCnodes,
-							           real om1,
-									     unsigned int* geom,
-							           unsigned int* neighborX,
-							           unsigned int* neighborY,
-							           unsigned int* neighborZ,
-									     unsigned int* neighborWSB,
-							           unsigned int size_Mat,
-							           bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   QVelDeviceCompThinWallsPartOne27<<< gridQ, threads >>> (vx,
-											                  vy,
-											                  vz,
-											                  DD,
-											                  k_Q,
-											                  QQ,
-											                  numberOfBCnodes,
-											                  om1,
-											                  neighborX,
-											                  neighborY,
-											                  neighborZ,
-											                  size_Mat,
-											                  isEvenTimestep);
-   getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
-
-	QThinWallsPartTwo27 <<< gridQ, threads >>> (
-       DD,
-       k_Q,
-       QQ,
-       numberOfBCnodes,
-       geom,
-       neighborX,
-       neighborY,
-       neighborZ,
-       neighborWSB,
-       size_Mat,
-       isEvenTimestep);
-   getLastCudaError("QThinWallsPartTwo27 execution failed");
-}
-
-void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevCompThinWalls27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* geom,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
+
+    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        geom,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QThinWallsPartTwo27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevCompZeroPress27(LBMSimulationParameter *parameterDevice, QforBoundaryConditions *boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QVelDeviceCompZeroPress27<<< grid, threads >>> (
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-   getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevIncompHighNu27(unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVelDeviceIncompHighNu27<<< gridQ, threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
-      getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevCompHighNu27(  unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVelDeviceCompHighNu27<<< gridQ, threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
-      getLastCudaError("QVelDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVeloDevEQ27(unsigned int numberOfThreads,
-							 real* VeloX,
-							 real* VeloY,
-							 real* VeloZ,
-							 real* DD,
-							 int* k_Q,
-							 int numberOfBCnodes,
-							 real om1,
-							 unsigned int* neighborX,
-							 unsigned int* neighborY,
-							 unsigned int* neighborZ,
-							 unsigned int size_Mat,
-							 bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QVeloDeviceEQ27<<< gridQ, threads >>> (VeloX,
-											 VeloY,
-											 VeloZ,
-											 DD,
-											 k_Q,
-											 numberOfBCnodes,
-											 om1,
-											 neighborX,
-											 neighborY,
-											 neighborZ,
-											 size_Mat,
-											 isEvenTimestep);
-      getLastCudaError("QVeloDeviceEQ27 execution failed");
+    QVelDeviceCompZeroPress27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevIncompHighNu27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevCompHighNu27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceComp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVeloDevEQ27(
+    unsigned int numberOfThreads,
+    real* VeloX,
+    real* VeloY,
+    real* VeloZ,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVeloDeviceEQ27<<< grid.grid, grid.threads >>> (
+        VeloX,
+        VeloY,
+        VeloZ,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVeloDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVeloStreetDevEQ27(
-	uint  numberOfThreads,
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep)
-{
-	int Grid = (numberOfStreetNodes / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 gridQ(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	QVeloStreetDeviceEQ27 << < gridQ, threads >> > (
-		veloXfraction,
-		veloYfraction,
-		naschVelo,
-		DD,
-		naschIndex,
-		numberOfStreetNodes,
-		velocityRatio,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
+    uint  numberOfThreads,
+    real* veloXfraction,
+    real* veloYfraction,
+    int*  naschVelo,
+    real* DD,
+    int*  naschIndex,
+    int   numberOfStreetNodes,
+    real  velocityRatio,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint  numberOfLBnodes,
+    bool  isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfStreetNodes);
+
+    QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > (
+        veloXfraction,
+        veloYfraction,
+        naschVelo,
+        DD,
+        naschIndex,
+        numberOfStreetNodes,
+        velocityRatio,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipDevice27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDevice27 execution failed");
+    QSlipDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->turbViscosity,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
+    QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->turbViscosity,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->turbViscosity,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
+    QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->turbViscosity,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27 execution failed");
+    QSlipDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("BBSlipDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QSlipGeomDevComp27(unsigned int numberOfThreads,
-								   real* DD,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   real* NormalX,
-								   real* NormalY,
-								   real* NormalZ,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QSlipGeomDeviceComp27<<< gridQ, threads >>> (DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   NormalX,
-												   NormalY,
-												   NormalZ,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
-      getLastCudaError("QSlipGeomDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QSlipNormDevComp27(unsigned int numberOfThreads,
-								   real* DD,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   real* NormalX,
-								   real* NormalY,
-								   real* NormalZ,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QSlipNormDeviceComp27<<< gridQ, threads >>> (DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   NormalX,
-												   NormalY,
-												   NormalZ,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
-      getLastCudaError("QSlipGeomDeviceComp27 execution failed");
+    BBSlipDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("BBSlipDeviceComp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QSlipGeomDevComp27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes);
+
+    QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        NormalX,
+        NormalY,
+        NormalZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QSlipGeomDeviceComp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QSlipNormDevComp27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        NormalX,
+        NormalY,
+        NormalZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QSlipNormDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-      QStressDeviceComp27<<< grid, threads >>> (
-         para->getParD(level)->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         para->getParD(level)->omega,
-         para->getParD(level)->turbViscosity,
-         para->getParD(level)->velocityX,
-         para->getParD(level)->velocityY,
-         para->getParD(level)->velocityY,
-         boundaryCondition->normalX,
-         boundaryCondition->normalY,
-         boundaryCondition->normalZ,
-         boundaryCondition->Vx,
-         boundaryCondition->Vy,
-         boundaryCondition->Vz,
-         boundaryCondition->Vx1,
-         boundaryCondition->Vy1,
-         boundaryCondition->Vz1,
-         para->getParD(level)->wallModel.samplingOffset,
-         para->getParD(level)->wallModel.z0,
-         para->getHasWallModelMonitor(),
-         para->getParD(level)->wallModel.u_star,
-         para->getParD(level)->wallModel.Fx,
-         para->getParD(level)->wallModel.Fy,
-         para->getParD(level)->wallModel.Fz,
-         para->getParD(level)->neighborX,
-         para->getParD(level)->neighborY,
-         para->getParD(level)->neighborZ,
-         para->getParD(level)->numberOfNodes,
-         para->getParD(level)->isEvenTimestep);
-      getLastCudaError("QSlipDeviceComp27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
+
+    QStressDeviceComp27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->omega,
+        para->getParD(level)->turbViscosity,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("QStressDeviceComp27 execution failed");
 }
 
 //////////////////////////////////////////////////////////////////////////
 void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-   BBStressDevice27<<< grid, threads >>> (
-      para->getParD(level)->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->kN,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      para->getParD(level)->velocityX,
-      para->getParD(level)->velocityY,
-      para->getParD(level)->velocityY,
-      boundaryCondition->normalX,
-      boundaryCondition->normalY,
-      boundaryCondition->normalZ,
-      boundaryCondition->Vx,
-      boundaryCondition->Vy,
-      boundaryCondition->Vz,
-      boundaryCondition->Vx1,
-      boundaryCondition->Vy1,
-      boundaryCondition->Vz1,
-      para->getParD(level)->wallModel.samplingOffset,
-      para->getParD(level)->wallModel.z0,
-      para->getHasWallModelMonitor(),
-      para->getParD(level)->wallModel.u_star,
-      para->getParD(level)->wallModel.Fx,
-      para->getParD(level)->wallModel.Fy,
-      para->getParD(level)->wallModel.Fz,
-      para->getParD(level)->neighborX,
-      para->getParD(level)->neighborY,
-      para->getParD(level)->neighborZ,
-      para->getParD(level)->numberOfNodes,
-      para->getParD(level)->isEvenTimestep);
-      getLastCudaError("BBStressDevice27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
+
+    BBStressDevice27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("BBStressDevice27 execution failed");
 }
 
 //////////////////////////////////////////////////////////////////////////
 void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-   BBStressPressureDevice27<<< grid, threads >>> (
-      para->getParD(level)->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->kN,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      para->getParD(level)->velocityX,
-      para->getParD(level)->velocityY,
-      para->getParD(level)->velocityY,
-      boundaryCondition->normalX,
-      boundaryCondition->normalY,
-      boundaryCondition->normalZ,
-      boundaryCondition->Vx,
-      boundaryCondition->Vy,
-      boundaryCondition->Vz,
-      boundaryCondition->Vx1,
-      boundaryCondition->Vy1,
-      boundaryCondition->Vz1,
-      para->getParD(level)->wallModel.samplingOffset,
-      para->getParD(level)->wallModel.z0,
-      para->getHasWallModelMonitor(),
-      para->getParD(level)->wallModel.u_star,
-      para->getParD(level)->wallModel.Fx,
-      para->getParD(level)->wallModel.Fy,
-      para->getParD(level)->wallModel.Fz,
-      para->getParD(level)->neighborX,
-      para->getParD(level)->neighborY,
-      para->getParD(level)->neighborZ,
-      para->getParD(level)->numberOfNodes,
-      para->getParD(level)->isEvenTimestep);
-      getLastCudaError("BBStressDevice27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
+
+    BBStressPressureDevice27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("BBStressPressureDevice27 execution failed");
 }
 
 //////////////////////////////////////////////////////////////////////////
 void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressDevice27<<< grid, threads >>> (
-      boundaryCondition->RhoBC,
-      parameterDevice->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      parameterDevice->omega,
-      parameterDevice->neighborX,
-      parameterDevice->neighborY,
-      parameterDevice->neighborZ,
-      parameterDevice->numberOfNodes,
-      parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevAntiBB27(  unsigned int numberOfThreads,
-                                    real* rhoBC,
-									real* vx,
-									real* vy,
-									real* vz,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-    QPressDeviceAntiBB27<<< gridQ, threads >>>( rhoBC,
-												vx,
-												vy,
-												vz,
-												DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												om1,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
+    QPressDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevAntiBB27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>(
+        rhoBC,
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("QPressDeviceAntiBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QPressDevFixBackflow27( unsigned int numberOfThreads,
-                                        real* rhoBC,
-                                        real* DD,
-                                        int* k_Q,
-                                        unsigned int numberOfBCnodes,
-                                        real om1,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QPressDeviceFixBackflow27<<< gridQ, threads >>> (  rhoBC,
-                                                         DD,
-                                                         k_Q,
-                                                         numberOfBCnodes,
-                                                         om1,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         size_Mat,
-                                                         isEvenTimestep);
-      getLastCudaError("QPressDeviceFixBackflow27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevDirDepBot27(  unsigned int numberOfThreads,
-                                       real* rhoBC,
-                                       real* DD,
-                                       int* k_Q,
-                                       unsigned int numberOfBCnodes,
-                                       real om1,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QPressDeviceDirDepBot27<<< gridQ, threads >>> ( rhoBC,
-                                                      DD,
-                                                      k_Q,
-                                                      numberOfBCnodes,
-                                                      om1,
-                                                      neighborX,
-                                                      neighborY,
-                                                      neighborZ,
-                                                      size_Mat,
-                                                      isEvenTimestep);
-      getLastCudaError("QPressDeviceDirDepBot27 execution failed");
+void QPressDevFixBackflow27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceFixBackflow27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceFixBackflow27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevDirDepBot27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceDirDepBot27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceDirDepBot27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QPressNoRhoDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep,
+        vf::lbm::dir::DIR_P00);
+    getLastCudaError("QPressNoRhoDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressNoRhoDevice27<<< grid, threads >>> (
-         boundaryCondition->RhoBC,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressNoRhoDevice27 execution failed");
+    QPressZeroRhoOutflowDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep,
+        vf::lbm::dir::DIR_P00,
+        parameterDevice->outflowPressureCorrectionFactor);
+    getLastCudaError("QPressZeroRhoOutflowDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QInflowScaleByPressDevice27<<< grid, threads >>> (
-           boundaryCondition->RhoBC,
-           parameterDevice->distributions.f[0],
-           boundaryCondition->k,
-           boundaryCondition->kN,
-           boundaryCondition->numberOfBCnodes,
-           parameterDevice->omega,
-           parameterDevice->neighborX,
-           parameterDevice->neighborY,
-           parameterDevice->neighborZ,
-           parameterDevice->numberOfNodes,
-           parameterDevice->isEvenTimestep);
-   getLastCudaError("QInflowScaleByPressDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevOld27(  unsigned int numberOfThreads,
-                                     real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
-                                     unsigned int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QPressDeviceOld27<<< gridQ, threads >>> ( rhoBC,
-                                                DD,
-                                                k_Q,
-                                                k_N,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("QPressDeviceOld27 execution failed");
+    QInflowScaleByPressDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QInflowScaleByPressDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevOld27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceOld27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressDeviceIncompNEQ27<<< grid, threads >>> (
-         boundaryCondition->RhoBC,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
+    QPressDeviceIncompNEQ27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressDeviceNEQ27<<< grid, threads >>> (
+    QPressDeviceNEQ27<<< grid, threads >>> (
         boundaryCondition->RhoBC,
         parameterDevice->distributions.f[0],
         boundaryCondition->k,
@@ -3907,3241 +3105,2743 @@ void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi
         parameterDevice->neighborZ,
         parameterDevice->numberOfNodes,
         parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDevNEQ27 execution failed");
+    getLastCudaError("QPressDevNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-      QPressDeviceEQZ27<<< grid, threads >>> (
-            boundaryCondition->RhoBC,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->kN,
-            parameterDevice->kDistTestRE.f[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-      getLastCudaError("QPressDeviceEQZ27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevZero27(unsigned int numberOfThreads,
-                                real* DD,
-                                int* k_Q,
-                                unsigned int numberOfBCnodes,
-                                unsigned int* neighborX,
-                                unsigned int* neighborY,
-                                unsigned int* neighborZ,
-                                unsigned int size_Mat,
-                                bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QPressDeviceZero27<<< gridQ, threads >>> (DD,
-                                                k_Q,
-                                                numberOfBCnodes,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("QPressDeviceOld27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevFake27(     unsigned int numberOfThreads,
-                                     real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
-                                     unsigned int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      QPressDeviceFake27<<< gridQ, threads >>> (rhoBC,
-                                                DD,
-                                                k_Q,
-                                                k_N,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("QPressDeviceFake27 execution failed");
+    QPressDeviceEQZ27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        parameterDevice->kDistTestRE.f[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDeviceEQZ27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevZero27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceZero27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceOld27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevFake27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+
+    QPressDeviceFake27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceFake27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   BBDevice27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("BBDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDev27_IntBB(  unsigned int numberOfThreads,
-									real* rho,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
-{
-	int Grid = (numberOfBCnodes / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 gridQ(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-		QPressDevice27_IntBB<<< gridQ, threads >>> (rho,
-													DD,
-													k_Q,
-													QQ,
-													numberOfBCnodes,
-													om1,
-													neighborX,
-													neighborY,
-													neighborZ,
-													size_Mat,
-													isEvenTimestep);
-		getLastCudaError("QPressDevice27_IntBB execution failed");
+    BBDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("BBDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDev27_IntBB(
+    unsigned int numberOfThreads,
+    real* rho,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (
+        rho,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDevice27_IntBB execution failed");
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-void PressSchlaffer27(unsigned int numberOfThreads,
-                                 real* rhoBC,
-                                 real* DD,
-                                 real* vx0,
-                                 real* vy0,
-                                 real* vz0,
-                                 real* deltaVz0,
-                                 int* k_Q,
-                                 int* k_N,
-                                 int numberOfBCnodes,
-                                 real om1,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      PressSchlaff27<<< gridQ, threads >>>(  rhoBC,
-                                             DD,
-                                             vx0,
-                                             vy0,
-                                             vz0,
-                                             deltaVz0,
-                                             k_Q,
-                                             k_N,
-                                             numberOfBCnodes,
-                                             om1,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             isEvenTimestep);
-      getLastCudaError("PressSchlaff27 execution failed");
+void PressSchlaffer27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    real* vx0,
+    real* vy0,
+    real* vz0,
+    real* deltaVz0,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    PressSchlaff27<<< grid.grid, grid.threads >>>(
+        rhoBC,
+        DD,
+        vx0,
+        vy0,
+        vz0,
+        deltaVz0,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("PressSchlaff27 execution failed");
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-void VelSchlaffer27(  unsigned int numberOfThreads,
-                                 int t,
-                                 real* DD,
-                                 real* vz0,
-                                 real* deltaVz0,
-                                 int* k_Q,
-                                 int* k_N,
-                                 int numberOfBCnodes,
-                                 real om1,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      VelSchlaff27<<< gridQ, threads >>>( t,
-                                          DD,
-                                          vz0,
-                                          deltaVz0,
-                                          k_Q,
-                                          k_N,
-                                          numberOfBCnodes,
-                                          om1,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-      getLastCudaError("VelSchlaff27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void PropVelo(   unsigned int numberOfThreads,
-                            unsigned int* neighborX,
-                            unsigned int* neighborY,
-                            unsigned int* neighborZ,
-                            real* rho,
-                            real* ux,
-                            real* uy,
-                            real* uz,
-                            int* k_Q,
-							unsigned int size_Prop,
-                            unsigned int size_Mat,
-                            unsigned int* bcMatD,
-                            real* DD,
-                            bool EvenOrOdd)
-{
-   int Grid = (size_Prop / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 grid(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      PropellerBC<<< grid, threads >>>(neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       rho,
-                                       ux,
-                                       uy,
-                                       uz,
-									   k_Q,
-									   size_Prop,
-                                       size_Mat,
-									   bcMatD,
-                                       DD,
-                                       EvenOrOdd);
-      getLastCudaError("PropellerBC execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF27( real* DC,
-                        real* DF,
-                        unsigned int* neighborCX,
-                        unsigned int* neighborCY,
-                        unsigned int* neighborCZ,
-                        unsigned int* neighborFX,
-                        unsigned int* neighborFY,
-                        unsigned int* neighborFZ,
-                        unsigned int size_MatC,
-                        unsigned int size_MatF,
-                        bool isEvenTimestep,
-                        unsigned int* posCSWB,
-                        unsigned int* posFSWB,
-                        unsigned int kCF,
-                        real omCoarse,
-                        real omFine,
-                        real nu,
-                        unsigned int nxC,
-                        unsigned int nyC,
-                        unsigned int nxF,
-                        unsigned int nyF,
-                        unsigned int numberOfThreads)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF27<<< gridINT_CF, threads >>> ( DC,
-                                             DF,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posCSWB,
-                                             posFSWB,
-                                             kCF,
-                                             omCoarse,
-                                             omFine,
-                                             nu,
-                                             nxC,
-                                             nyC,
-                                             nxF,
-                                             nyF);
-      getLastCudaError("scaleCF27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFEff27(real* DC,
-                             real* DF,
-                             unsigned int* neighborCX,
-                             unsigned int* neighborCY,
-                             unsigned int* neighborCZ,
-                             unsigned int* neighborFX,
-                             unsigned int* neighborFY,
-                             unsigned int* neighborFZ,
-                             unsigned int size_MatC,
-                             unsigned int size_MatF,
-                             bool isEvenTimestep,
-                             unsigned int* posCSWB,
-                             unsigned int* posFSWB,
-                             unsigned int kCF,
-                             real omCoarse,
-                             real omFine,
-                             real nu,
-                             unsigned int nxC,
-                             unsigned int nyC,
-                             unsigned int nxF,
-                             unsigned int nyF,
-                             unsigned int numberOfThreads,
-                             OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFEff27<<< gridINT_CF, threads >>> ( DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFEff27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFLast27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFLast27<<< gridINT_CF, threads >>> (DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFLast27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFpress27(  real* DC,
-                                 real* DF,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real omCoarse,
-                                 real omFine,
-                                 real nu,
-                                 unsigned int nxC,
-                                 unsigned int nyC,
-                                 unsigned int nxF,
-                                 unsigned int nyF,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFpress27<<< gridINT_CF, threads >>>(DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFpress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_Fix_27(  real* DC,
-                                 real* DF,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real omCoarse,
-                                 real omFine,
-                                 real nu,
-                                 unsigned int nxC,
-                                 unsigned int nyC,
-                                 unsigned int nxF,
-                                 unsigned int nyF,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_Fix_27<<< gridINT_CF, threads >>>(DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_Fix_comp_27( real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_Fix_comp_27<<< gridINT_CF, threads >>>(   DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_0817_comp_27(real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF,
-                            CUstream_st *stream)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_0817_comp_27<<< gridINT_CF, threads, 0, stream >>>(  DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
-      getLastCudaError("scaleCF_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_comp_D3Q27F3_2018(real* DC,
-										  real* DF,
-										  real* G6,
-										  unsigned int* neighborCX,
-										  unsigned int* neighborCY,
-										  unsigned int* neighborCZ,
-										  unsigned int* neighborFX,
-										  unsigned int* neighborFY,
-										  unsigned int* neighborFZ,
-										  unsigned int size_MatC,
-										  unsigned int size_MatF,
-										  bool isEvenTimestep,
-										  unsigned int* posCSWB,
-										  unsigned int* posFSWB,
-										  unsigned int kCF,
-										  real omCoarse,
-										  real omFine,
-										  real nu,
-										  unsigned int nxC,
-										  unsigned int nyC,
-										  unsigned int nxF,
-										  unsigned int nyF,
-										  unsigned int numberOfThreads,
-										  OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_comp_D3Q27F3_2018 <<< gridINT_CF, threads >>>(DC,
-															DF,
-															G6,
-															neighborCX,
-															neighborCY,
-															neighborCZ,
-															neighborFX,
-															neighborFY,
-															neighborFZ,
-															size_MatC,
-															size_MatF,
-															isEvenTimestep,
-															posCSWB,
-															posFSWB,
-															kCF,
-															omCoarse,
-															omFine,
-															nu,
-															nxC,
-															nyC,
-															nxF,
-															nyF,
-															offCF);
-      getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_comp_D3Q27F3(real* DC,
-									 real* DF,
-									 real* G6,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF,
-                            CUstream_st *stream)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_comp_D3Q27F3 <<< gridINT_CF, threads, 0, stream >>>( DC,
-														DF,
-														G6,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
-      getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_staggered_time_comp_27(  real* DC,
-												 real* DF,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posCSWB,
-												 unsigned int* posFSWB,
-												 unsigned int kCF,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 unsigned int numberOfThreads,
-												 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_staggered_time_comp_27<<< gridINT_CF, threads >>>(    DC,
-																	DF,
-																	neighborCX,
-																	neighborCY,
-																	neighborCZ,
-																	neighborFX,
-																	neighborFY,
-																	neighborFZ,
-																	size_MatC,
-																	size_MatF,
-																	isEvenTimestep,
-																	posCSWB,
-																	posFSWB,
-																	kCF,
-																	omCoarse,
-																	omFine,
-																	nu,
-																	nxC,
-																	nyC,
-																	nxF,
-																	nyF,
-																	offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
+void VelSchlaffer27(
+    unsigned int numberOfThreads,
+    int t,
+    real* DD,
+    real* vz0,
+    real* deltaVz0,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    VelSchlaff27<<< grid.grid, grid.threads >>>(
+        t,
+        DD,
+        vz0,
+        deltaVz0,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("VelSchlaff27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice,
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        boundaryCondition->sizeQ,
+        parameterDevice->omega,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->q27[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        boundaryCondition->velocityX,
+        boundaryCondition->velocityY,
+        boundaryCondition->velocityZ,
+        timeRatio,
+        velocityRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice,
+                        QforPrecursorBoundaryConditions* boundaryCondition,
+                        real timeRatio,
+                        real velocityRatio)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->omega,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborX,
+        parameterDevice->neighborX,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        boundaryCondition->velocityX,
+        boundaryCondition->velocityY,
+        boundaryCondition->velocityZ,
+        timeRatio,
+        velocityRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("PrecursorDeviceEQ27 execution failed");
+
+}
+//////////////////////////////////////////////////////////////////////////
+void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        timeRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("PrecursorDeviceDistributions execution failed");
+
+}
+
+//////////////////////////////////////////////////////////////////////////
+void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->sizeQ,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        timeRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
+
+}
+//////////////////////////////////////////////////////////////////////////
+extern "C" void PropVelo(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    int* k_Q,
+    unsigned int size_Prop,
+    unsigned long long numberOfLBnodes,
+    unsigned int* bcMatD,
+    real* DD,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Prop);
+
+    PropellerBC<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        rho,
+        ux,
+        uy,
+        uz,
+        k_Q,
+        size_Prop,
+        numberOfLBnodes,
+        bcMatD,
+        DD,
+        EvenOrOdd);
+    getLastCudaError("PropellerBC execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF);
+    getLastCudaError("scaleCF27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFEff27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFEff27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCFEff27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFLast27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFLast27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCFLast27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFpress27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFpress27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCFpress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_Fix_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_Fix_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_Fix_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_Fix_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_Fix_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_0817_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_0817_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_comp_D3Q27F3_2018(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_comp_D3Q27F3(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_staggered_time_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_staggered_time_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellCF->ICellCFC,
-      icellCF->ICellCFF,
-      icellCF->kCF,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      parameterDeviceC->vis,
-      parameterDeviceC->nx,
-      parameterDeviceC->ny,
-      parameterDeviceF->nx,
-      parameterDeviceF->ny,
-      offsetCF);
-   getLastCudaError("scaleCF_RhoSq_27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        icellCF->ICellCFC,
+        icellCF->ICellCFF,
+        icellCF->kCF,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->vis,
+        parameterDeviceC->nx,
+        parameterDeviceC->ny,
+        parameterDeviceF->nx,
+        parameterDeviceF->ny,
+        offsetCF);
+    getLastCudaError("scaleCF_RhoSq_27 execution failed");
 }
 
 void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleCF_compressible<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellCF->ICellCFC,
-      icellCF->ICellCFF,
-      icellCF->kCF,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      offsetCF);
-   getLastCudaError("scaleCF_compressible execution failed");
-}
-
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
-											 real* DF,
-											 unsigned int* neighborCX,
-											 unsigned int* neighborCY,
-											 unsigned int* neighborCZ,
-											 unsigned int* neighborFX,
-											 unsigned int* neighborFY,
-											 unsigned int* neighborFZ,
-											 unsigned int size_MatC,
-											 unsigned int size_MatF,
-											 bool isEvenTimestep,
-											 unsigned int* posCSWB,
-											 unsigned int* posFSWB,
-											 unsigned int kCF,
-											 real omCoarse,
-											 real omFine,
-											 real nu,
-											 unsigned int nxC,
-											 unsigned int nyC,
-											 unsigned int nxF,
-											 unsigned int nyF,
-											 unsigned int numberOfThreads,
-											 OffCF offCF,
-                                  CUstream_st *stream)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_RhoSq_3rdMom_comp_27<<< gridINT_CF, threads, 0, stream >>>(  DC,
-																DF,
-																neighborCX,
-																neighborCY,
-																neighborCZ,
-																neighborFX,
-																neighborFY,
-																neighborFZ,
-																size_MatC,
-																size_MatF,
-																isEvenTimestep,
-																posCSWB,
-																posFSWB,
-																kCF,
-																omCoarse,
-																omFine,
-																nu,
-																nxC,
-																nyC,
-																nxF,
-																nyF,
-																offCF);
-      getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_AA2016_comp_27(real* DC,
-									   real* DF,
-									   unsigned int* neighborCX,
-									   unsigned int* neighborCY,
-									   unsigned int* neighborCZ,
-									   unsigned int* neighborFX,
-									   unsigned int* neighborFY,
-									   unsigned int* neighborFZ,
-									   unsigned int size_MatC,
-									   unsigned int size_MatF,
-									   bool isEvenTimestep,
-									   unsigned int* posCSWB,
-									   unsigned int* posFSWB,
-									   unsigned int kCF,
-									   real omCoarse,
-									   real omFine,
-									   real nu,
-									   unsigned int nxC,
-									   unsigned int nyC,
-									   unsigned int nxF,
-									   unsigned int nyF,
-									   unsigned int numberOfThreads,
-									   OffCF offCF,
-                              CUstream_st *stream)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_AA2016_comp_27<<< gridINT_CF, threads, 0, stream >>>(DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
-      getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_NSPress_27(  real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCF_NSPress_27<<< gridINT_CF, threads >>>(DC,
-													DF,
-													neighborCX,
-													neighborCY,
-													neighborCZ,
-													neighborFX,
-													neighborFY,
-													neighborFZ,
-													size_MatC,
-													size_MatF,
-													isEvenTimestep,
-													posCSWB,
-													posFSWB,
-													kCF,
-													omCoarse,
-													omFine,
-													nu,
-													nxC,
-													nyC,
-													nxF,
-													nyF,
-													offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThSMG7(   real* DC,
-                                 real* DF,
-                                 real* DD7C,
-                                 real* DD7F,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real nu,
-                                 real diffusivity_fine,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFThSMG7<<< gridINT_CF, threads >>> (DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine,
-                                                offCF);
-      getLastCudaError("scaleCFThSMG7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThS7(  real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real nu,
-                              real diffusivity_fine,
-                              unsigned int numberOfThreads)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFThS7<<< gridINT_CF, threads >>> (  DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine);
-      getLastCudaError("scaleCFThS7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThS27( real* DC,
-                              real* DF,
-                              real* DD27C,
-                              real* DD27F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real nu,
-                              real diffusivity_fine,
-                              unsigned int numberOfThreads,
-							  OffCF offCF)
-{
-   int Grid = (kCF / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_CF(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleCFThS27<<< gridINT_CF, threads >>> ( DC,
-                                                DF,
-                                                DD27C,
-                                                DD27F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine,
-										        offCF);
-      getLastCudaError("scaleCFThS27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC27( real* DC,
-                           real* DF,
-                           unsigned int* neighborCX,
-                           unsigned int* neighborCY,
-                           unsigned int* neighborCZ,
-                           unsigned int* neighborFX,
-                           unsigned int* neighborFY,
-                           unsigned int* neighborFZ,
-                           unsigned int size_MatC,
-                           unsigned int size_MatF,
-                           bool isEvenTimestep,
-                           unsigned int* posC,
-                           unsigned int* posFSWB,
-                           unsigned int kFC,
-                           real omCoarse,
-                           real omFine,
-                           real nu,
-                           unsigned int nxC,
-                           unsigned int nyC,
-                           unsigned int nxF,
-                           unsigned int nyF,
-                           unsigned int numberOfThreads)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC27<<< gridINT_FC, threads >>> ( DC,
-                                             DF,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posC,
-                                             posFSWB,
-                                             kFC,
-                                             omCoarse,
-                                             omFine,
-                                             nu,
-                                             nxC,
-                                             nyC,
-                                             nxF,
-                                             nyF);
-      getLastCudaError("scaleFC27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCEff27(real* DC,
-                             real* DF,
-                             unsigned int* neighborCX,
-                             unsigned int* neighborCY,
-                             unsigned int* neighborCZ,
-                             unsigned int* neighborFX,
-                             unsigned int* neighborFY,
-                             unsigned int* neighborFZ,
-                             unsigned int size_MatC,
-                             unsigned int size_MatF,
-                             bool isEvenTimestep,
-                             unsigned int* posC,
-                             unsigned int* posFSWB,
-                             unsigned int kFC,
-                             real omCoarse,
-                             real omFine,
-                             real nu,
-                             unsigned int nxC,
-                             unsigned int nyC,
-                             unsigned int nxF,
-                             unsigned int nyF,
-                             unsigned int numberOfThreads,
-                             OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCEff27<<< gridINT_FC, threads >>> ( DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offFC);
-      getLastCudaError("scaleFCEff27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCLast27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCLast27<<< gridINT_FC, threads >>> (DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offFC);
-      getLastCudaError("Kernel execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCpress27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCpress27<<< gridINT_FC, threads >>> (  DC,
-                                                   DF,
-                                                   neighborCX,
-                                                   neighborCY,
-                                                   neighborCZ,
-                                                   neighborFX,
-                                                   neighborFY,
-                                                   neighborFZ,
-                                                   size_MatC,
-                                                   size_MatF,
-                                                   isEvenTimestep,
-                                                   posC,
-                                                   posFSWB,
-                                                   kFC,
-                                                   omCoarse,
-                                                   omFine,
-                                                   nu,
-                                                   nxC,
-                                                   nyC,
-                                                   nxF,
-                                                   nyF,
-                                                   offFC);
-      getLastCudaError("scaleFCpress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_Fix_27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_Fix_27<<< gridINT_FC, threads >>> (  DC,
-                                                   DF,
-                                                   neighborCX,
-                                                   neighborCY,
-                                                   neighborCZ,
-                                                   neighborFX,
-                                                   neighborFY,
-                                                   neighborFZ,
-                                                   size_MatC,
-                                                   size_MatF,
-                                                   isEvenTimestep,
-                                                   posC,
-                                                   posFSWB,
-                                                   kFC,
-                                                   omCoarse,
-                                                   omFine,
-                                                   nu,
-                                                   nxC,
-                                                   nyC,
-                                                   nxF,
-                                                   nyF,
-                                                   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_Fix_comp_27(  real* DC,
-									  real* DF,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_Fix_comp_27<<< gridINT_FC, threads >>> ( DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_0817_comp_27( real* DC,
-									  real* DF,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC,
-                             CUstream_st *stream)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_0817_comp_27<<< gridINT_FC, threads, 0, stream >>> (DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
-      getLastCudaError("scaleFC_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_comp_D3Q27F3_2018( real* DC,
-										   real* DF,
-										   real* G6,
-										   unsigned int* neighborCX,
-										   unsigned int* neighborCY,
-										   unsigned int* neighborCZ,
-										   unsigned int* neighborFX,
-										   unsigned int* neighborFY,
-										   unsigned int* neighborFZ,
-										   unsigned int size_MatC,
-										   unsigned int size_MatF,
-										   bool isEvenTimestep,
-										   unsigned int* posC,
-										   unsigned int* posFSWB,
-										   unsigned int kFC,
-										   real omCoarse,
-										   real omFine,
-										   real nu,
-										   unsigned int nxC,
-										   unsigned int nyC,
-										   unsigned int nxF,
-										   unsigned int nyF,
-										   unsigned int numberOfThreads,
-										   OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-     scaleFC_comp_D3Q27F3_2018 <<< gridINT_FC, threads >>> (DC,
-															DF,
-															G6,
-															neighborCX,
-															neighborCY,
-															neighborCZ,
-															neighborFX,
-															neighborFY,
-															neighborFZ,
-															size_MatC,
-															size_MatF,
-															isEvenTimestep,
-															posC,
-															posFSWB,
-															kFC,
-															omCoarse,
-															omFine,
-															nu,
-															nxC,
-															nyC,
-															nxF,
-															nyF,
-															offFC);
-      getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_comp_D3Q27F3( real* DC,
-									  real* DF,
-									  real* G6,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC,
-                             CUstream_st *stream)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-     scaleFC_comp_D3Q27F3 <<< gridINT_FC, threads, 0, stream >>> (DC,
-													   DF,
-													   G6,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
-      getLastCudaError("scaleFC_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_staggered_time_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  unsigned int numberOfThreads,
-												  OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_staggered_time_comp_27<<< gridINT_FC, threads >>> (  DC,
-																   DF,
-																   neighborCX,
-																   neighborCY,
-																   neighborCZ,
-																   neighborFX,
-																   neighborFY,
-																   neighborFZ,
-																   size_MatC,
-																   size_MatF,
-																   isEvenTimestep,
-																   posC,
-																   posFSWB,
-																   kFC,
-																   omCoarse,
-																   omFine,
-																   nu,
-																   nxC,
-																   nyC,
-																   nxF,
-																   nyF,
-																   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleCF_compressible<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        icellCF->ICellCFC,
+        icellCF->ICellCFF,
+        icellCF->kCF,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        offsetCF);
+    getLastCudaError("scaleCF_compressible execution failed");
+}
+
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_RhoSq_3rdMom_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_AA2016_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_NSPress_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_NSPress_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offCF);
+    getLastCudaError("scaleCF_NSPress_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThSMG7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThSMG7<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine,
+        offCF);
+    getLastCudaError("scaleCFThSMG7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThS7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThS7<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine);
+    getLastCudaError("scaleCFThS7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThS27(
+    real* DC,
+    real* DF,
+    real* DD27C,
+    real* DD27F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads,
+    OffCF offCF)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThS27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD27C,
+        DD27F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine,
+        offCF);
+    getLastCudaError("scaleCFThS27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF);
+    getLastCudaError("scaleFC27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCEff27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCEff27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFCEff27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCLast27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCLast27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("Kernel execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCpress27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCpress27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFCpress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_Fix_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_Fix_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_Fix_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_Fix_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_Fix_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_0817_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_0817_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_comp_D3Q27F3_2018(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_comp_D3Q27F3(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> (
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_comp_D3Q27F3 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_staggered_time_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_staggered_time_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellFC->ICellFCC,
-      icellFC->ICellFCF,
-      icellFC->kFC,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      parameterDeviceC->vis,
-      parameterDeviceC->nx,
-      parameterDeviceC->ny,
-      parameterDeviceF->nx,
-      parameterDeviceF->ny,
-      offsetFC);
-   getLastCudaError("scaleFC_RhoSq_27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        icellFC->ICellFCC,
+        icellFC->ICellFCF,
+        icellFC->kFC,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->vis,
+        parameterDeviceC->nx,
+        parameterDeviceC->ny,
+        parameterDeviceF->nx,
+        parameterDeviceF->ny,
+        offsetFC);
+    getLastCudaError("scaleFC_RhoSq_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleFC_compressible<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellFC->ICellFCC,
-      icellFC->ICellFCF,
-      icellFC->kFC,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      offsetFC);
-   getLastCudaError("scaleFC_compressible execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posC,
-											  unsigned int* posFSWB,
-											  unsigned int kFC,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  unsigned int numberOfThreads,
-											  OffFC offFC,
-                                   CUstream_st *stream)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_RhoSq_3rdMom_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC,
-															  DF,
-															  neighborCX,
-															  neighborCY,
-															  neighborCZ,
-															  neighborFX,
-															  neighborFY,
-															  neighborFZ,
-															  size_MatC,
-															  size_MatF,
-															  isEvenTimestep,
-															  posC,
-															  posFSWB,
-															  kFC,
-															  omCoarse,
-															  omFine,
-															  nu,
-															  nxC,
-															  nyC,
-															  nxF,
-															  nyF,
-															  offFC);
-      getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_AA2016_comp_27( real* DC,
-										real* DF,
-										unsigned int* neighborCX,
-										unsigned int* neighborCY,
-										unsigned int* neighborCZ,
-										unsigned int* neighborFX,
-										unsigned int* neighborFY,
-										unsigned int* neighborFZ,
-										unsigned int size_MatC,
-										unsigned int size_MatF,
-										bool isEvenTimestep,
-										unsigned int* posC,
-										unsigned int* posFSWB,
-										unsigned int kFC,
-										real omCoarse,
-										real omFine,
-										real nu,
-										unsigned int nxC,
-										unsigned int nyC,
-										unsigned int nxF,
-										unsigned int nyF,
-										unsigned int numberOfThreads,
-										OffFC offFC,
-                              CUstream_st *stream)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_AA2016_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posC,
-														posFSWB,
-														kFC,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offFC);
-      getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_NSPress_27(real* DC,
-								  real* DF,
-								  unsigned int* neighborCX,
-								  unsigned int* neighborCY,
-								  unsigned int* neighborCZ,
-								  unsigned int* neighborFX,
-								  unsigned int* neighborFY,
-								  unsigned int* neighborFZ,
-								  unsigned int size_MatC,
-								  unsigned int size_MatF,
-								  bool isEvenTimestep,
-								  unsigned int* posC,
-								  unsigned int* posFSWB,
-								  unsigned int kFC,
-								  real omCoarse,
-								  real omFine,
-								  real nu,
-								  unsigned int nxC,
-								  unsigned int nyC,
-								  unsigned int nxF,
-								  unsigned int nyF,
-								  unsigned int numberOfThreads,
-								  OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFC_NSPress_27<<< gridINT_FC, threads >>> (  DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThSMG7(real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCThSMG7<<< gridINT_FC, threads >>>( DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                nu,
-                                                diffusivity_coarse,
-                                                offFC);
-      getLastCudaError("scaleFCThSMG7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThS7(  real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCThS7<<< gridINT_FC, threads >>>(DC,
-                                             DF,
-                                             DD7C,
-                                             DD7F,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posC,
-                                             posFSWB,
-                                             kFC,
-                                             nu,
-                                             diffusivity_coarse);
-      getLastCudaError("scaleFCThS7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThS27( real* DC,
-                              real* DF,
-                              real* DD27C,
-                              real* DD27F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads,
-							  OffFC offFC)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      scaleFCThS27<<< gridINT_FC, threads >>>(  DC,
-                                                DF,
-                                                DD27C,
-                                                DD27F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                nu,
-                                                diffusivity_coarse,
-												offFC);
-      getLastCudaError("scaleFCThS27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void DragLiftPostD27(real* DD,
-								int* k_Q,
-								real* QQ,
-								int numberOfBCnodes,
-								double *DragX,
-								double *DragY,
-								double *DragZ,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
-{
-	int Grid = (numberOfBCnodes / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	DragLiftPost27<<< grid, threads >>>(DD,
-										k_Q,
-										QQ,
-										numberOfBCnodes,
-										DragX,
-										DragY,
-										DragZ,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("DragLift27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void DragLiftPreD27( real* DD,
-								int* k_Q,
-								real* QQ,
-								int numberOfBCnodes,
-								double *DragX,
-								double *DragY,
-								double *DragZ,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
-{
-	int Grid = (numberOfBCnodes / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	DragLiftPre27<<< grid, threads >>>( DD,
-										k_Q,
-										QQ,
-										numberOfBCnodes,
-										DragX,
-										DragY,
-										DragZ,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("DragLift27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcCPtop27(real* DD,
-							int* cpIndex,
-							int nonCp,
-							double *cpPress,
-							unsigned int* neighborX,
-							unsigned int* neighborY,
-							unsigned int* neighborZ,
-							unsigned int size_Mat,
-							bool isEvenTimestep,
-							unsigned int numberOfThreads)
-{
-	int Grid = (nonCp / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	CalcCP27<<< grid, threads >>>(DD,
-								  cpIndex,
-								  nonCp,
-								  cpPress,
-								  neighborX,
-								  neighborY,
-								  neighborZ,
-								  size_Mat,
-								  isEvenTimestep);
-	getLastCudaError("CalcCP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcCPbottom27( real* DD,
-								int* cpIndex,
-								int nonCp,
-								double *cpPress,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
-{
-	int Grid = (nonCp / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	CalcCP27<<< grid, threads >>>(DD,
-								  cpIndex,
-								  nonCp,
-								  cpPress,
-								  neighborX,
-								  neighborY,
-								  neighborZ,
-								  size_Mat,
-								  isEvenTimestep);
-	getLastCudaError("CalcCP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void GetSendFsPreDev27(real* DD,
-								  real* bufferFs,
-								  int* sendIndex,
-								  int buffmax,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep,
-								  unsigned int numberOfThreads,
-								  cudaStream_t stream)
-{
-	int Grid = (buffmax / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	getSendFsPre27<<< grid, threads, 0, stream >>>(DD,
-										bufferFs,
-										sendIndex,
-										buffmax,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("getSendFsPre27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void GetSendFsPostDev27(real* DD,
-								   real* bufferFs,
-								   int* sendIndex,
-								   int buffmax,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep,
-								   unsigned int numberOfThreads,
-								   cudaStream_t stream)
-{
-	int Grid = (buffmax / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	getSendFsPost27<<< grid, threads, 0, stream >>>(DD,
-										 bufferFs,
-										 sendIndex,
-										 buffmax,
-										 neighborX,
-										 neighborY,
-										 neighborZ,
-										 size_Mat,
-										 isEvenTimestep);
-	getLastCudaError("getSendFsPost27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void SetRecvFsPreDev27(real* DD,
-								  real* bufferFs,
-								  int* recvIndex,
-								  int buffmax,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep,
-								  unsigned int numberOfThreads,
-	                              cudaStream_t stream)
-{
-	int Grid = (buffmax / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	setRecvFsPre27<<< grid, threads, 0, stream >>>(DD,
-										bufferFs,
-										recvIndex,
-										buffmax,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("setRecvFsPre27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void SetRecvFsPostDev27(real* DD,
-								   real* bufferFs,
-								   int* recvIndex,
-								   int buffmax,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep,
-	                               unsigned int numberOfThreads,
-	                               cudaStream_t stream)
-{
-	int Grid = (buffmax / numberOfThreads)+1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid/Grid1)+1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1 );
-
-	setRecvFsPost27<<< grid, threads, 0, stream >>>(DD,
-										 bufferFs,
-										 recvIndex,
-										 buffmax,
-										 neighborX,
-										 neighborY,
-										 neighborZ,
-										 size_Mat,
-										 isEvenTimestep);
-	getLastCudaError("setRecvFsPost27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleFC_compressible<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        icellFC->ICellFCC,
+        icellFC->ICellFCF,
+        icellFC->kFC,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        offsetFC);
+    getLastCudaError("scaleFC_compressible execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_RhoSq_3rdMom_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_AA2016_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_NSPress_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_NSPress_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        offFC);
+    getLastCudaError("scaleFC_NSPress_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThSMG7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThSMG7<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse,
+        offFC);
+    getLastCudaError("scaleFCThSMG7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThS7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThS7<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse);
+    getLastCudaError("scaleFCThS7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThS27(
+    real* DC,
+    real* DF,
+    real* DD27C,
+    real* DD27F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads,
+    OffFC offFC)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThS27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD27C,
+        DD27F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse,
+        offFC);
+    getLastCudaError("scaleFCThS27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void DragLiftPostD27(
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    double *DragX,
+    double *DragY,
+    double *DragZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    DragLiftPost27<<< grid.grid, grid.threads >>>(
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        DragX,
+        DragY,
+        DragZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("DragLiftPost27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void DragLiftPreD27(
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    double *DragX,
+    double *DragY,
+    double *DragZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    DragLiftPre27<<< grid.grid, grid.threads >>>(
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        DragX,
+        DragY,
+        DragZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("DragLiftPre27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcCPtop27(
+    real* DD,
+    int* cpIndex,
+    int nonCp,
+    double *cpPress,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
+
+    CalcCP27<<< grid.grid, grid.threads >>>(
+        DD,
+        cpIndex,
+        nonCp,
+        cpPress,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("CalcCP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcCPbottom27(
+    real* DD,
+    int* cpIndex,
+    int nonCp,
+    double *cpPress,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
+
+    CalcCP27<<< grid.grid, grid.threads >>>(
+        DD,
+        cpIndex,
+        nonCp,
+        cpPress,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("CalcCP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void GetSendFsPreDev27(
+    real* DD,
+    real* bufferFs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("getSendFsPre27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void GetSendFsPostDev27(
+    real* DD,
+    real* bufferFs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("getSendFsPost27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void SetRecvFsPreDev27(
+    real* DD,
+    real* bufferFs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("setRecvFsPre27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void SetRecvFsPostDev27(
+    real* DD,
+    real* bufferFs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("setRecvFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void getSendGsDevF3(
-	real* G6,
-	real* bufferGs,
-	int* sendIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep,
-	unsigned int numberOfThreads)
-{
-	int Grid = (buffmax / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	getSendGsF3 <<< grid, threads >>> (
-		G6,
-		bufferGs,
-		sendIndex,
-		buffmax,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("getSendGsF3 execution failed");
+    real* G6,
+    real* bufferGs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendGsF3 <<< grid.grid, grid.threads >>> (
+        G6,
+        bufferGs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("getSendGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void setRecvGsDevF3(
-	real* G6,
-	real* bufferGs,
-	int* recvIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep,
-	unsigned int numberOfThreads)
-{
-	int Grid = (buffmax / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	setRecvGsF3 <<< grid, threads >>> (
-		G6,
-		bufferGs,
-		recvIndex,
-		buffmax,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("setRecvGsF3 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void WallFuncDev27(unsigned int numberOfThreads,
-							  real* vx,
-							  real* vy,
-							  real* vz,
-							  real* DD,
-							  int* k_Q,
-							  real* QQ,
-							  unsigned int numberOfBCnodes,
-							  real om1,
-							  unsigned int* neighborX,
-							  unsigned int* neighborY,
-							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
-							  bool isEvenTimestep)
-{
-   int Grid = (numberOfBCnodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      WallFunction27<<< gridQ, threads >>> (
-											  vx,
-											  vy,
-											  vz,
-											  DD,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
-      getLastCudaError("WallFunction27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
-										  real* vxD,
-										  real* vyD,
-										  real* vzD,
-										  real* vxWall,
-										  real* vyWall,
-										  real* vzWall,
-										  int numberOfWallNodes,
-										  int* kWallNodes,
-										  real* rhoD,
-										  real* pressD,
-										  unsigned int* geoD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
-										  real* DD,
-										  bool isEvenTimestep)
-{
-   int Grid = (numberOfWallNodes / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      LBSetOutputWallVelocitySP27<<< gridQ, threads >>> (	vxD,
-															vyD,
-															vzD,
-															vxWall,
-															vyWall,
-															vzWall,
-															numberOfWallNodes,
-															kWallNodes,
-															rhoD,
-															pressD,
-															geoD,
-															neighborX,
-															neighborY,
-															neighborZ,
-															size_Mat,
-															DD,
-															isEvenTimestep);
-      getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void GetVelotoForce27(unsigned int numberOfThreads,
-								 real* DD,
-								 int* bcIndex,
-								 int nonAtBC,
-								 real* Vx,
-								 real* Vy,
-								 real* Vz,
-								 unsigned int* neighborX,
-								 unsigned int* neighborY,
-								 unsigned int* neighborZ,
-								 unsigned int size_Mat,
-								 bool isEvenTimestep)
-{
-   int Grid = (nonAtBC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-      GetVeloforForcing27<<< gridQ, threads >>> (DD,
-												bcIndex,
-												nonAtBC,
-												Vx,
-												Vy,
-												Vz,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
-      getLastCudaError("GetVeloforForcing27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitParticlesDevice(real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* coordParticleXlocal,
-									real* coordParticleYlocal,
-									real* coordParticleZlocal,
-									real* coordParticleXglobal,
-									real* coordParticleYglobal,
-									real* coordParticleZglobal,
-									real* veloParticleX,
-									real* veloParticleY,
-									real* veloParticleZ,
-									real* randArray,
-									unsigned int* particleID,
-									unsigned int* cellBaseID,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-							        int level,
-									unsigned int numberOfParticles,
-									unsigned int size_Mat,
-									unsigned int numberOfThreads)
-{
-   int Grid = (numberOfParticles / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   InitParticles<<< gridQ, threads >>> (coordX,
-										coordY,
-										coordZ,
-										coordParticleXlocal,
-										coordParticleYlocal,
-										coordParticleZlocal,
-										coordParticleXglobal,
-										coordParticleYglobal,
-										coordParticleZglobal,
-										veloParticleX,
-										veloParticleY,
-										veloParticleZ,
-										randArray,
-										particleID,
-										cellBaseID,
-										bcMatD,
-										neighborX,
-										neighborY,
-										neighborZ,
-										neighborWSB,
-										level,
-										numberOfParticles,
-										size_Mat);
-      getLastCudaError("InitParticles execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void MoveParticlesDevice(real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* coordParticleXlocal,
-									real* coordParticleYlocal,
-									real* coordParticleZlocal,
-									real* coordParticleXglobal,
-									real* coordParticleYglobal,
-									real* coordParticleZglobal,
-									real* veloParticleX,
-									real* veloParticleY,
-									real* veloParticleZ,
-									real* DD,
-									real  omega,
-									unsigned int* particleID,
-									unsigned int* cellBaseID,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-							        int level,
-									unsigned int timestep,
-									unsigned int numberOfTimesteps,
-									unsigned int numberOfParticles,
-									unsigned int size_Mat,
-									unsigned int numberOfThreads,
-									bool isEvenTimestep)
-{
-   int Grid = (numberOfParticles / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   MoveParticles<<< gridQ, threads >>> (coordX,
-										coordY,
-										coordZ,
-										coordParticleXlocal,
-										coordParticleYlocal,
-										coordParticleZlocal,
-										coordParticleXglobal,
-										coordParticleYglobal,
-										coordParticleZglobal,
-										veloParticleX,
-										veloParticleY,
-										veloParticleZ,
-										DD,
-										omega,
-										particleID,
-										cellBaseID,
-										bcMatD,
-										neighborX,
-										neighborY,
-										neighborZ,
-										neighborWSB,
-										level,
-										timestep,
-										numberOfTimesteps,
-										numberOfParticles,
-										size_Mat,
-										isEvenTimestep);
-      getLastCudaError("MoveParticles execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void initRandomDevice(curandState* state,
-								 unsigned int size_Mat,
-								 unsigned int numberOfThreads)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   initRandom<<< gridQ, threads >>> (state);
-   getLastCudaError("initRandom execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void generateRandomValuesDevice( curandState* state,
-											unsigned int size_Mat,
-											real* randArray,
-											unsigned int numberOfThreads)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   generateRandomValues<<< gridQ, threads >>> (state,randArray);
-   getLastCudaError("generateRandomValues execution failed");
+    real* G6,
+    real* bufferGs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvGsF3 <<< grid.grid, grid.threads >>> (
+        G6,
+        bufferGs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("setRecvGsF3 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void WallFuncDev27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    WallFunction27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("WallFunction27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void SetOutputWallVelocitySP27(
+    unsigned int numberOfThreads,
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* vxWall,
+    real* vyWall,
+    real* vzWall,
+    int numberOfWallNodes,
+    int* kWallNodes,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes);
+
+    LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        vxWall,
+        vyWall,
+        vzWall,
+        numberOfWallNodes,
+        kWallNodes,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void GetVelotoForce27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* bcIndex,
+    int nonAtBC,
+    real* Vx,
+    real* Vy,
+    real* Vz,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC);
+
+    GetVeloforForcing27<<< grid.grid, grid.threads >>> (
+        DD,
+        bcIndex,
+        nonAtBC,
+        Vx,
+        Vy,
+        Vz,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("GetVeloforForcing27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitParticlesDevice(
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* coordParticleXlocal,
+    real* coordParticleYlocal,
+    real* coordParticleZlocal,
+    real* coordParticleXglobal,
+    real* coordParticleYglobal,
+    real* coordParticleZglobal,
+    real* veloParticleX,
+    real* veloParticleY,
+    real* veloParticleZ,
+    real* randArray,
+    unsigned int* particleID,
+    unsigned int* cellBaseID,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    int level,
+    unsigned int numberOfParticles,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
+
+    InitParticles<<< grid.grid, grid.threads >>> (
+        coordX,
+        coordY,
+        coordZ,
+        coordParticleXlocal,
+        coordParticleYlocal,
+        coordParticleZlocal,
+        coordParticleXglobal,
+        coordParticleYglobal,
+        coordParticleZglobal,
+        veloParticleX,
+        veloParticleY,
+        veloParticleZ,
+        randArray,
+        particleID,
+        cellBaseID,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        level,
+        numberOfParticles,
+        numberOfLBnodes);
+    getLastCudaError("InitParticles execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void MoveParticlesDevice(
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* coordParticleXlocal,
+    real* coordParticleYlocal,
+    real* coordParticleZlocal,
+    real* coordParticleXglobal,
+    real* coordParticleYglobal,
+    real* coordParticleZglobal,
+    real* veloParticleX,
+    real* veloParticleY,
+    real* veloParticleZ,
+    real* DD,
+    real  omega,
+    unsigned int* particleID,
+    unsigned int* cellBaseID,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    int level,
+    unsigned int timestep,
+    unsigned int numberOfTimesteps,
+    unsigned int numberOfParticles,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
+
+    MoveParticles<<< grid.grid, grid.threads >>> (
+        coordX,
+        coordY,
+        coordZ,
+        coordParticleXlocal,
+        coordParticleYlocal,
+        coordParticleZlocal,
+        coordParticleXglobal,
+        coordParticleYglobal,
+        coordParticleZglobal,
+        veloParticleX,
+        veloParticleY,
+        veloParticleZ,
+        DD,
+        omega,
+        particleID,
+        cellBaseID,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        level,
+        timestep,
+        numberOfTimesteps,
+        numberOfParticles,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("MoveParticles execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void initRandomDevice(
+    curandState* state,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    initRandom<<< grid.grid, grid.threads >>> (state);
+    getLastCudaError("initRandom execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void generateRandomValuesDevice(
+    curandState* state,
+    unsigned long long numberOfLBnodes,
+    real* randArray,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray);
+    getLastCudaError("generateRandomValues execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcTurbulenceIntensityDevice(
-   real* vxx,
-   real* vyy,
-   real* vzz,
-   real* vxy,
-   real* vxz,
-   real* vyz,
-   real* vx_mean,
-   real* vy_mean,
-   real* vz_mean,
-   real* DD,
-   uint* typeOfGridNode,
-   unsigned int* neighborX,
-   unsigned int* neighborY,
-   unsigned int* neighborZ,
-   unsigned int size_Mat,
-   bool isEvenTimestep,
-   uint numberOfThreads)
-{
-   int Grid = (size_Mat / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
-
-   CalcTurbulenceIntensity<<<gridQ, threads>>>(
-     vxx,
-     vyy,
-     vzz,
-	 vxy,
-     vxz,
-     vyz,
-     vx_mean,
-     vy_mean,
-     vz_mean,
-     DD,
-     typeOfGridNode,
-     neighborX,
-     neighborY,
-     neighborZ,
-     size_Mat,
-     isEvenTimestep);
-
-   getLastCudaError("CalcTurbulenceIntensity execution failed");
+    real* vxx,
+    real* vyy,
+    real* vzz,
+    real* vxy,
+    real* vxz,
+    real* vyz,
+    real* vx_mean,
+    real* vy_mean,
+    real* vz_mean,
+    real* DD,
+    uint* typeOfGridNode,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    uint numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    CalcTurbulenceIntensity<<<grid.grid, grid.threads>>>(
+        vxx,
+        vyy,
+        vzz,
+        vxy,
+        vxz,
+        vyz,
+        vx_mean,
+        vy_mean,
+        vz_mean,
+        DD,
+        typeOfGridNode,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("CalcTurbulenceIntensity execution failed");
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
index 314687c4b29a32962b386d7c083f72b754388e5b..79dedee58afb7b11c4c3ede9911f54df65cf859f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
@@ -1,92 +1,117 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NoSlipBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
-#include "KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDevice3rdMomentsComp27(
-													 real* distributions, 
-													 int* subgridDistanceIndices, 
-													 real* subgridDistances,
-													 unsigned int numberOfBCnodes, 
-													 real omega, 
-													 unsigned int* neighborX,
-													 unsigned int* neighborY,
-													 unsigned int* neighborZ,
-													 unsigned int numberOfLBnodes, 
-													 bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
-      D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
-      D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,24 +132,24 @@ __global__ void QDevice3rdMomentsComp27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &subgridDistances[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &subgridDistances[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &subgridDistances[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &subgridDistances[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &subgridDistances[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &subgridDistances[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &subgridDistances[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &subgridDistances[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &subgridDistances[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &subgridDistances[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &subgridDistances[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &subgridDistances[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &subgridDistances[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &subgridDistances[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &subgridDistances[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &subgridDistances[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &subgridDistances[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &subgridDistances[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &subgridDistances[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &subgridDistances[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &subgridDistances[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &subgridDistances[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &subgridDistances[DIR_00P * numberOfBCnodes];
+      q_dirB   = &subgridDistances[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &subgridDistances[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &subgridDistances[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &subgridDistances[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &subgridDistances[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &subgridDistances[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &subgridDistances[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &subgridDistances[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &subgridDistances[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &subgridDistances[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &subgridDistances[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &subgridDistances[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &subgridDistances[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &subgridDistances[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &subgridDistances[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &subgridDistances[DIR_PMP * numberOfBCnodes];
@@ -167,32 +192,32 @@ __global__ void QDevice3rdMomentsComp27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q, m3;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -217,63 +242,63 @@ __global__ void QDevice3rdMomentsComp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
-         D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
-         D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -559,77 +584,78 @@ __global__ void QDevice3rdMomentsComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QDeviceIncompHighNu27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes, 
-												 bool isEvenTimestep)
+__global__ void QDeviceIncompHighNu27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -650,24 +676,24 @@ __global__ void QDeviceIncompHighNu27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -710,32 +736,32 @@ __global__ void QDeviceIncompHighNu27(real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -760,63 +786,63 @@ __global__ void QDeviceIncompHighNu27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1055,77 +1081,77 @@ __global__ void QDeviceIncompHighNu27(real* DD,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDeviceCompHighNu27(
-												 real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1146,24 +1172,24 @@ __global__ void QDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1206,58 +1232,58 @@ __global__ void QDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
-      //f_W    = (D.f[DIR_P00   ])[ke   ];
-      //f_E    = (D.f[DIR_M00   ])[kw   ];
-      //f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f_B    = (D.f[DIR_00P   ])[kt   ];
-      //f_T    = (D.f[DIR_00M   ])[kb   ];
-      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
+      //f_W    = (D.f[DIR_P00])[ke   ];
+      //f_E    = (D.f[DIR_M00])[kw   ];
+      //f_S    = (D.f[DIR_0P0])[kn   ];
+      //f_N    = (D.f[DIR_0M0])[ks   ];
+      //f_B    = (D.f[DIR_00P])[kt   ];
+      //f_T    = (D.f[DIR_00M])[kb   ];
+      //f_SW   = (D.f[DIR_PP0])[kne  ];
+      //f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0])[kse  ];
+      //f_SE   = (D.f[DIR_MP0])[knw  ];
+      //f_BW   = (D.f[DIR_P0P])[kte  ];
+      //f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1282,63 +1308,63 @@ __global__ void QDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1629,16 +1655,16 @@ __global__ void QDeviceCompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDeviceComp27(
-										 real* distributions, 
-										 int* subgridDistanceIndices, 
-										 real* subgridDistances,
-										 unsigned int numberOfBCnodes, 
-										 real omega, 
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int numberOfLBnodes, 
-										 bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -1646,16 +1672,9 @@ __global__ void QDeviceComp27(
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -1673,7 +1692,7 @@ __global__ void QDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1705,32 +1724,32 @@ __global__ void QDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1761,7 +1780,7 @@ __global__ void QDeviceComp27(
        ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -1769,7 +1788,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -1777,7 +1796,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -1785,7 +1804,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -1793,7 +1812,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -1801,7 +1820,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -1809,7 +1828,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -1817,7 +1836,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -1825,7 +1844,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -1833,7 +1852,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -1841,7 +1860,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -1849,7 +1868,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -1857,7 +1876,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -1865,7 +1884,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -1873,7 +1892,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -1881,7 +1900,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -1889,7 +1908,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -1897,7 +1916,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -1905,7 +1924,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -1913,7 +1932,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -1921,7 +1940,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -1929,7 +1948,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -1937,7 +1956,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -1945,7 +1964,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -1953,7 +1972,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -1961,7 +1980,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2011,16 +2030,17 @@ __global__ void QDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QDevice27(real* distributions, 
-                                     int* subgridDistanceIndices, 
-                                     real* subgridDistances,
-                                     unsigned int numberOfBCnodes, 
-                                     real omega, 
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes, 
-                                     bool isEvenTimestep)
+__global__ void QDevice27(
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -2028,19 +2048,12 @@ __global__ void QDevice27(real* distributions,
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
 
       //////////////////////////////////////////////////////////////////////////
@@ -2059,7 +2072,7 @@ __global__ void QDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -2091,32 +2104,32 @@ __global__ void QDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2148,7 +2161,7 @@ __global__ void QDevice27(real* distributions,
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -2156,7 +2169,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -2164,7 +2177,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -2172,7 +2185,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -2180,7 +2193,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -2188,7 +2201,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -2196,7 +2209,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -2204,7 +2217,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -2212,7 +2225,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -2220,7 +2233,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -2228,7 +2241,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -2236,7 +2249,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -2244,7 +2257,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -2252,7 +2265,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -2260,7 +2273,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -2268,7 +2281,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -2276,7 +2289,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -2284,7 +2297,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -2292,7 +2305,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -2300,7 +2313,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -2308,7 +2321,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -2316,7 +2329,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -2324,7 +2337,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -2332,7 +2345,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -2340,7 +2353,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -2348,7 +2361,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2398,15 +2411,16 @@ __global__ void QDevice27(real* distributions,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void BBDevice27(real* distributions, 
-                                     int* subgridDistanceIndices, 
-                                     real* subgridDistances,
-                                     unsigned int numberOfBCnodes, 
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes, 
-                                     bool isEvenTimestep)
+__global__ void BBDevice27(
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -2414,18 +2428,11 @@ __global__ void BBDevice27(real* distributions,
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;   // global x-index
-   const unsigned  y = blockIdx.x;    // global y-index
-   const unsigned  z = blockIdx.y;    // global z-index
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size of boundary condition (numberOfBCnodes)
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -2443,7 +2450,7 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
       unsigned int kn   = indexOfBCnode;
@@ -2474,32 +2481,32 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -2509,32 +2516,32 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E  ;
-      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W  ;
-      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N  ;
-      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S  ;
-      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T  ;
-      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B  ;
-      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE ;
-      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW ;
-      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE ;
-      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW ;
-      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE ;
-      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW ;
-      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE ;
-      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW ;
-      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN ;
-      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS ;
-      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN ;
-      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS ;
-      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE;
-      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW;
-      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE;
-      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW;
-      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE;
-      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW;
-      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE;
-      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW;
+      q = (subgridD.q[DIR_P00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00])[kw  ]=f_E  ;
+      q = (subgridD.q[DIR_M00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00])[ke  ]=f_W  ;
+      q = (subgridD.q[DIR_0P0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0])[ks  ]=f_N  ;
+      q = (subgridD.q[DIR_0M0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0])[kn  ]=f_S  ;
+      q = (subgridD.q[DIR_00P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M])[kb  ]=f_T  ;
+      q = (subgridD.q[DIR_00M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P])[kt  ]=f_B  ;
+      q = (subgridD.q[DIR_PP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0])[ksw ]=f_NE ;
+      q = (subgridD.q[DIR_MM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0])[kne ]=f_SW ;
+      q = (subgridD.q[DIR_PM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0])[knw ]=f_SE ;
+      q = (subgridD.q[DIR_MP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0])[kse ]=f_NW ;
+      q = (subgridD.q[DIR_P0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M])[kbw ]=f_TE ;
+      q = (subgridD.q[DIR_M0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P])[kte ]=f_BW ;
+      q = (subgridD.q[DIR_P0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P])[ktw ]=f_BE ;
+      q = (subgridD.q[DIR_M0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M])[kbe ]=f_TW ;
+      q = (subgridD.q[DIR_0PP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM])[kbs ]=f_TN ;
+      q = (subgridD.q[DIR_0MM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP])[ktn ]=f_BS ;
+      q = (subgridD.q[DIR_0PM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP])[kts ]=f_BN ;
+      q = (subgridD.q[DIR_0MP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM])[kbn ]=f_TS ;
+      q = (subgridD.q[DIR_PPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE;
+      q = (subgridD.q[DIR_MMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW;
+      q = (subgridD.q[DIR_PPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE;
+      q = (subgridD.q[DIR_MMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW;
+      q = (subgridD.q[DIR_PMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE;
+      q = (subgridD.q[DIR_MPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW;
+      q = (subgridD.q[DIR_PMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE;
+      q = (subgridD.q[DIR_MPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
index 3a3ab784e6a7901c41d402629172c3c6154ffde9..22d9df4a3b4ae706dcf9b76d93940122015248f1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
@@ -29,7 +29,7 @@ __global__ void InitParticles( real* coordX,
 										  unsigned int* neighborWSB,
 										  int level,
 									      unsigned int numberOfParticles, 
-										  unsigned int size_Mat)
+										  unsigned long long numberOfLBnodes)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -72,12 +72,12 @@ __global__ void InitParticles( real* coordX,
 
 		////////////////////////////////////////////////////////////////////////////////
 		//find random node of the fluid domain
-		unsigned int cbID = (unsigned int)(randArray[k]*size_Mat);
-		for(int i = 0; i < size_Mat;i++)
+		unsigned int cbID = (unsigned int)(randArray[k]*numberOfLBnodes);
+		for(int i = 0; i < numberOfLBnodes;i++)
 		{
 			//if (coordX[cbID] < 15 && coordX[cbID] > 5 && coordY[cbID] < 15 && coordY[cbID] > 5 && coordZ[cbID] < 15 && coordZ[cbID] > 5)	break;
 			if (coordX[cbID] < 5 && coordX[cbID] > 2)	break;
-			cbID = (unsigned int)(randArray[k]*(size_Mat - i)); 
+			cbID = (unsigned int)(randArray[k]*(numberOfLBnodes - i)); 
 		}
 	   
 		real coordinateX;
@@ -183,7 +183,7 @@ __global__ void MoveParticles( real* coordX,
 										  unsigned int timestep, 
 										  unsigned int numberOfTimesteps, 
 									      unsigned int numberOfParticles, 
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -248,63 +248,63 @@ __global__ void MoveParticles( real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[DIR_P00   *size_Mat];
-			  fwC    = &DD[DIR_M00   *size_Mat];
-			  fnC    = &DD[DIR_0P0   *size_Mat];
-			  fsC    = &DD[DIR_0M0   *size_Mat];
-			  ftC    = &DD[DIR_00P   *size_Mat];
-			  fbC    = &DD[DIR_00M   *size_Mat];
-			  fneC   = &DD[DIR_PP0  *size_Mat];
-			  fswC   = &DD[DIR_MM0  *size_Mat];
-			  fseC   = &DD[DIR_PM0  *size_Mat];
-			  fnwC   = &DD[DIR_MP0  *size_Mat];
-			  fteC   = &DD[DIR_P0P  *size_Mat];
-			  fbwC   = &DD[DIR_M0M  *size_Mat];
-			  fbeC   = &DD[DIR_P0M  *size_Mat];
-			  ftwC   = &DD[DIR_M0P  *size_Mat];
-			  ftnC   = &DD[DIR_0PP  *size_Mat];
-			  fbsC   = &DD[DIR_0MM  *size_Mat];
-			  fbnC   = &DD[DIR_0PM  *size_Mat];
-			  ftsC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  ftneC  = &DD[DIR_PPP *size_Mat];
-			  ftswC  = &DD[DIR_MMP *size_Mat];
-			  ftseC  = &DD[DIR_PMP *size_Mat];
-			  ftnwC  = &DD[DIR_MPP *size_Mat];
-			  fbneC  = &DD[DIR_PPM *size_Mat];
-			  fbswC  = &DD[DIR_MMM *size_Mat];
-			  fbseC  = &DD[DIR_PMM *size_Mat];
-			  fbnwC  = &DD[DIR_MPM *size_Mat];
+			  feC    = &DD[DIR_P00 * numberOfLBnodes];
+			  fwC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  ftC    = &DD[DIR_00P * numberOfLBnodes];
+			  fbC    = &DD[DIR_00M * numberOfLBnodes];
+			  fneC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fswC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fteC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fbwC   = &DD[DIR_M0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_P0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_M0P * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0PP * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0MM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0PM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  ftneC  = &DD[DIR_PPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_MMP * numberOfLBnodes];
+			  ftseC  = &DD[DIR_PMP * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_MPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_PPM * numberOfLBnodes];
+			  fbswC  = &DD[DIR_MMM * numberOfLBnodes];
+			  fbseC  = &DD[DIR_PMM * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_MPM * numberOfLBnodes];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[DIR_P00   *size_Mat];
-			  feC    = &DD[DIR_M00   *size_Mat];
-			  fsC    = &DD[DIR_0P0   *size_Mat];
-			  fnC    = &DD[DIR_0M0   *size_Mat];
-			  fbC    = &DD[DIR_00P   *size_Mat];
-			  ftC    = &DD[DIR_00M   *size_Mat];
-			  fswC   = &DD[DIR_PP0  *size_Mat];
-			  fneC   = &DD[DIR_MM0  *size_Mat];
-			  fnwC   = &DD[DIR_PM0  *size_Mat];
-			  fseC   = &DD[DIR_MP0  *size_Mat];
-			  fbwC   = &DD[DIR_P0P  *size_Mat];
-			  fteC   = &DD[DIR_M0M  *size_Mat];
-			  ftwC   = &DD[DIR_P0M  *size_Mat];
-			  fbeC   = &DD[DIR_M0P  *size_Mat];
-			  fbsC   = &DD[DIR_0PP  *size_Mat];
-			  ftnC   = &DD[DIR_0MM  *size_Mat];
-			  ftsC   = &DD[DIR_0PM  *size_Mat];
-			  fbnC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  fbswC  = &DD[DIR_PPP *size_Mat];
-			  fbneC  = &DD[DIR_MMP *size_Mat];
-			  fbnwC  = &DD[DIR_PMP *size_Mat];
-			  fbseC  = &DD[DIR_MPP *size_Mat];
-			  ftswC  = &DD[DIR_PPM *size_Mat];
-			  ftneC  = &DD[DIR_MMM *size_Mat];
-			  ftnwC  = &DD[DIR_PMM *size_Mat];
-			  ftseC  = &DD[DIR_MPM *size_Mat];
+			  fwC    = &DD[DIR_P00 * numberOfLBnodes];
+			  feC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  fbC    = &DD[DIR_00P * numberOfLBnodes];
+			  ftC    = &DD[DIR_00M * numberOfLBnodes];
+			  fswC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fneC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fbwC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fteC   = &DD[DIR_M0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_P0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_M0P * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0PP * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0MM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0PM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  fbswC  = &DD[DIR_PPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_MMP * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_PMP * numberOfLBnodes];
+			  fbseC  = &DD[DIR_MPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_PPM * numberOfLBnodes];
+			  ftneC  = &DD[DIR_MMM * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_PMM * numberOfLBnodes];
+			  ftseC  = &DD[DIR_MPM * numberOfLBnodes];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1055,7 +1055,7 @@ __global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  unsigned int timestep, 
 													  unsigned int numberOfTimesteps, 
 													  unsigned int numberOfParticles, 
-													  unsigned int size_Mat,
+													  unsigned long long numberOfLBnodes,
 													  bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -1114,63 +1114,63 @@ __global__ void MoveParticlesWithoutBCs(   real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[DIR_P00   *size_Mat];
-			  fwC    = &DD[DIR_M00   *size_Mat];
-			  fnC    = &DD[DIR_0P0   *size_Mat];
-			  fsC    = &DD[DIR_0M0   *size_Mat];
-			  ftC    = &DD[DIR_00P   *size_Mat];
-			  fbC    = &DD[DIR_00M   *size_Mat];
-			  fneC   = &DD[DIR_PP0  *size_Mat];
-			  fswC   = &DD[DIR_MM0  *size_Mat];
-			  fseC   = &DD[DIR_PM0  *size_Mat];
-			  fnwC   = &DD[DIR_MP0  *size_Mat];
-			  fteC   = &DD[DIR_P0P  *size_Mat];
-			  fbwC   = &DD[DIR_M0M  *size_Mat];
-			  fbeC   = &DD[DIR_P0M  *size_Mat];
-			  ftwC   = &DD[DIR_M0P  *size_Mat];
-			  ftnC   = &DD[DIR_0PP  *size_Mat];
-			  fbsC   = &DD[DIR_0MM  *size_Mat];
-			  fbnC   = &DD[DIR_0PM  *size_Mat];
-			  ftsC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  ftneC  = &DD[DIR_PPP *size_Mat];
-			  ftswC  = &DD[DIR_MMP *size_Mat];
-			  ftseC  = &DD[DIR_PMP *size_Mat];
-			  ftnwC  = &DD[DIR_MPP *size_Mat];
-			  fbneC  = &DD[DIR_PPM *size_Mat];
-			  fbswC  = &DD[DIR_MMM *size_Mat];
-			  fbseC  = &DD[DIR_PMM *size_Mat];
-			  fbnwC  = &DD[DIR_MPM *size_Mat];
+			  feC    = &DD[DIR_P00 * numberOfLBnodes];
+			  fwC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  ftC    = &DD[DIR_00P * numberOfLBnodes];
+			  fbC    = &DD[DIR_00M * numberOfLBnodes];
+			  fneC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fswC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fteC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fbwC   = &DD[DIR_M0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_P0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_M0P * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0PP * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0MM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0PM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  ftneC  = &DD[DIR_PPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_MMP * numberOfLBnodes];
+			  ftseC  = &DD[DIR_PMP * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_MPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_PPM * numberOfLBnodes];
+			  fbswC  = &DD[DIR_MMM * numberOfLBnodes];
+			  fbseC  = &DD[DIR_PMM * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_MPM * numberOfLBnodes];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[DIR_P00   *size_Mat];
-			  feC    = &DD[DIR_M00   *size_Mat];
-			  fsC    = &DD[DIR_0P0   *size_Mat];
-			  fnC    = &DD[DIR_0M0   *size_Mat];
-			  fbC    = &DD[DIR_00P   *size_Mat];
-			  ftC    = &DD[DIR_00M   *size_Mat];
-			  fswC   = &DD[DIR_PP0  *size_Mat];
-			  fneC   = &DD[DIR_MM0  *size_Mat];
-			  fnwC   = &DD[DIR_PM0  *size_Mat];
-			  fseC   = &DD[DIR_MP0  *size_Mat];
-			  fbwC   = &DD[DIR_P0P  *size_Mat];
-			  fteC   = &DD[DIR_M0M  *size_Mat];
-			  ftwC   = &DD[DIR_P0M  *size_Mat];
-			  fbeC   = &DD[DIR_M0P  *size_Mat];
-			  fbsC   = &DD[DIR_0PP  *size_Mat];
-			  ftnC   = &DD[DIR_0MM  *size_Mat];
-			  ftsC   = &DD[DIR_0PM  *size_Mat];
-			  fbnC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  fbswC  = &DD[DIR_PPP *size_Mat];
-			  fbneC  = &DD[DIR_MMP *size_Mat];
-			  fbnwC  = &DD[DIR_PMP *size_Mat];
-			  fbseC  = &DD[DIR_MPP *size_Mat];
-			  ftswC  = &DD[DIR_PPM *size_Mat];
-			  ftneC  = &DD[DIR_MMM *size_Mat];
-			  ftnwC  = &DD[DIR_PMM *size_Mat];
-			  ftseC  = &DD[DIR_MPM *size_Mat];
+			  fwC    = &DD[DIR_P00 * numberOfLBnodes];
+			  feC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  fbC    = &DD[DIR_00P * numberOfLBnodes];
+			  ftC    = &DD[DIR_00M * numberOfLBnodes];
+			  fswC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fneC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fbwC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fteC   = &DD[DIR_M0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_P0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_M0P * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0PP * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0MM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0PM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  fbswC  = &DD[DIR_PPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_MMP * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_PMP * numberOfLBnodes];
+			  fbseC  = &DD[DIR_MPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_PPM * numberOfLBnodes];
+			  ftneC  = &DD[DIR_MMM * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_PMM * numberOfLBnodes];
+			  ftseC  = &DD[DIR_MPM * numberOfLBnodes];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1928,7 +1928,7 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
 													  real* NormalX,
 													  real* NormalY,
 													  real* NormalZ,
-													  unsigned int size_Mat, 
+													  unsigned long long numberOfLBnodes, 
 													  bool isEvenTimestep)
 {
 
@@ -1937,63 +1937,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
    //////////////////////////////////////////////////////////////////////////////////
    //const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2015,24 +2015,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
    // //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
    // //         *q_dirBSE, *q_dirBNW; 
-   // //   q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-   //    q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-   // //   q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-   //    q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-   // //   q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-   //    q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-   // //   q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-   // //   q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-   // //   q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-   // //   q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-   // //   q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-   // //   q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-   // //   q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-   // //   q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-   // //   q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-   // //   q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-   // //   q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-   // //   q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+   // //   q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+   //    q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+   // //   q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+   //    q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+   // //   q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+   //    q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+   // //   q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+   // //   q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+   // //   q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+   // //   q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+   // //   q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+   // //   q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+   // //   q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+   // //   q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+   // //   q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+   // //   q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+   // //   q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+   // //   q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
    // //   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
    // //   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
    // //   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2047,24 +2047,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
    // //           *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
    // //           *nx_dirBSE, *nx_dirBNW; 
-   // //   nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-   // //   nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-   // //   nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-   // //   nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-   // //   nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-   // //   nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-   // //   nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-   // //   nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-   // //   nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-   // //   nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-   // //   nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-   // //   nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-   // //   nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-   // //   nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-   // //   nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-   // //   nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-   // //   nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-   // //   nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+   // //   nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+   // //   nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+   // //   nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+   // //   nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+   // //   nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+   // //   nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+   // //   nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+   // //   nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+   // //   nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+   // //   nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+   // //   nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+   // //   nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+   // //   nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+   // //   nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+   // //   nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+   // //   nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+   // //   nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+   // //   nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
    // //   nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
    // //   nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
    // //   nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -2079,24 +2079,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
    // //           *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
    // //           *ny_dirBSE, *ny_dirBNW; 
-   // //   ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-   // //   ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-   // //   ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-   // //   ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-   // //   ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-   // //   ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-   // //   ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-   // //   ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-   // //   ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-   // //   ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-   // //   ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-   // //   ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-   // //   ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-   // //   ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-   // //   ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-   // //   ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-   // //   ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-   // //   ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+   // //   ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+   // //   ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+   // //   ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+   // //   ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+   // //   ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+   // //   ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+   // //   ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+   // //   ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+   // //   ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+   // //   ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+   // //   ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+   // //   ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+   // //   ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+   // //   ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+   // //   ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+   // //   ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+   // //   ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+   // //   ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
    // //   ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
    // //   ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
    // //   ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -2111,24 +2111,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
    // //           *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
    // //           *nz_dirBSE, *nz_dirBNW; 
-   // //   nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-   // //   nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-   // //   nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-   // //   nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-   // //   nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-   // //   nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-   // //   nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-   // //   nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-   // //   nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-   // //   nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-   // //   nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-   // //   nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-   // //   nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-   // //   nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-   // //   nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-   // //   nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-   // //   nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-   // //   nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+   // //   nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+   // //   nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+   // //   nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+   // //   nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+   // //   nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+   // //   nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+   // //   nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+   // //   nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+   // //   nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+   // //   nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+   // //   nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+   // //   nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+   // //   nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+   // //   nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+   // //   nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+   // //   nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+   // //   nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+   // //   nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
    // //   nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
    // //   nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
    // //   nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -2190,32 +2190,32 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   unsigned int ktne = KQK;
    //   unsigned int kbsw = neighborZ[ksw];
    //   ////////////////////////////////////////////////////////////////////////////////
-   //   real f_W    = (D.f[DIR_P00   ])[ke   ];
-   //   real f_E    = (D.f[DIR_M00   ])[kw   ];
-   //   real f_S    = (D.f[DIR_0P0   ])[kn   ];
-   //   real f_N    = (D.f[DIR_0M0   ])[ks   ];
-   //   real f_B    = (D.f[DIR_00P   ])[kt   ];
-   //   real f_T    = (D.f[DIR_00M   ])[kb   ];
-   //   real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-   //   real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-   //   real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-   //   real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-   //   real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-   //   real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-   //   real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-   //   real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-   //   real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-   //   real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-   //   real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-   //   real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-   //   real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-   //   real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-   //   real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-   //   real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-   //   real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-   //   real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-   //   real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-   //   real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+   //   real f_W    = (D.f[DIR_P00])[ke   ];
+   //   real f_E    = (D.f[DIR_M00])[kw   ];
+   //   real f_S    = (D.f[DIR_0P0])[kn   ];
+   //   real f_N    = (D.f[DIR_0M0])[ks   ];
+   //   real f_B    = (D.f[DIR_00P])[kt   ];
+   //   real f_T    = (D.f[DIR_00M])[kb   ];
+   //   real f_SW   = (D.f[DIR_PP0])[kne  ];
+   //   real f_NE   = (D.f[DIR_MM0])[ksw  ];
+   //   real f_NW   = (D.f[DIR_PM0])[kse  ];
+   //   real f_SE   = (D.f[DIR_MP0])[knw  ];
+   //   real f_BW   = (D.f[DIR_P0P])[kte  ];
+   //   real f_TE   = (D.f[DIR_M0M])[kbw  ];
+   //   real f_TW   = (D.f[DIR_P0M])[kbe  ];
+   //   real f_BE   = (D.f[DIR_M0P])[ktw  ];
+   //   real f_BS   = (D.f[DIR_0PP])[ktn  ];
+   //   real f_TN   = (D.f[DIR_0MM])[kbs  ];
+   //   real f_TS   = (D.f[DIR_0PM])[kbn  ];
+   //   real f_BN   = (D.f[DIR_0MP])[kts  ];
+   //   real f_BSW  = (D.f[DIR_PPP])[ktne ];
+   //   real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+   //   real f_BNW  = (D.f[DIR_PMP])[ktse ];
+   //   real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+   //   real f_TSW  = (D.f[DIR_PPM])[kbne ];
+   //   real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+   //   real f_TNW  = (D.f[DIR_PMM])[kbse ];
+   //   real f_TSE  = (D.f[DIR_MPM])[kbnw ];
    //   ////////////////////////////////////////////////////////////////////////////////
    //   // real feq, q;
    //   real vx1, vx2, vx3, drho;
@@ -2241,63 +2241,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //   }
    //}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
new file mode 100644
index 0000000000000000000000000000000000000000..177eb41587896dd7993b06f98a1506abfc4f3f5f
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
@@ -0,0 +1,1157 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PrecursorBCs27.cu
+//! \ingroup GPU
+//! \author Henry Korb, Henrik Asmuth
+//======================================================================================
+#include "LBM/LB.h"
+#include <lbm/constants/NumericConstants.h>
+#include <lbm/constants/D3Q27.h>
+#include <lbm/MacroscopicQuantities.h>
+
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+__global__ void QPrecursorDeviceCompZeroPress(
+    int* subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    int sizeQ,
+    real omega,
+    real* distributions,
+    real* subgridDistances,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* vLast,
+    real* vCurrent,
+    real velocityX,
+    real velocityY,
+    real velocityZ,
+    real timeRatio,
+    real velocityRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    if(nodeIndex>=numberOfBCnodes) return;
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // interpolation of velocity
+    real vxLastInterpd, vyLastInterpd, vzLastInterpd;
+    real vxNextInterpd, vyNextInterpd, vzNextInterpd;
+
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
+
+    real* vxLast = vLast;
+    real* vyLast = &vLast[numberOfPrecursorNodes];
+    real* vzLast = &vLast[2*numberOfPrecursorNodes];
+
+    real* vxCurrent = vCurrent;
+    real* vyCurrent = &vCurrent[numberOfPrecursorNodes];
+    real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes];
+
+    if(d0PP < 1e6)
+    {
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
+
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
+
+        real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
+
+        vxLastInterpd = (vxLast[kNeighbor0PP]*d0PP + vxLast[kNeighbor0PM]*d0PM + vxLast[kNeighbor0MP]*d0MP + vxLast[kNeighbor0MM]*d0MM)*invWeightSum;
+        vyLastInterpd = (vyLast[kNeighbor0PP]*d0PP + vyLast[kNeighbor0PM]*d0PM + vyLast[kNeighbor0MP]*d0MP + vyLast[kNeighbor0MM]*d0MM)*invWeightSum;
+        vzLastInterpd = (vzLast[kNeighbor0PP]*d0PP + vzLast[kNeighbor0PM]*d0PM + vzLast[kNeighbor0MP]*d0MP + vzLast[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        vxNextInterpd = (vxCurrent[kNeighbor0PP]*d0PP + vxCurrent[kNeighbor0PM]*d0PM + vxCurrent[kNeighbor0MP]*d0MP + vxCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+        vyNextInterpd = (vyCurrent[kNeighbor0PP]*d0PP + vyCurrent[kNeighbor0PM]*d0PM + vyCurrent[kNeighbor0MP]*d0MP + vyCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+        vzNextInterpd = (vzCurrent[kNeighbor0PP]*d0PP + vzCurrent[kNeighbor0PM]*d0PM + vzCurrent[kNeighbor0MP]*d0MP + vzCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+    }
+    else
+    {
+        vxLastInterpd = vxLast[kNeighbor0PP];
+        vyLastInterpd = vyLast[kNeighbor0PP];
+        vzLastInterpd = vzLast[kNeighbor0PP];
+
+        vxNextInterpd = vxCurrent[kNeighbor0PP];
+        vyNextInterpd = vyCurrent[kNeighbor0PP];
+        vzNextInterpd = vzCurrent[kNeighbor0PP];
+    }
+
+    // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
+    real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio;
+    real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
+    // From here on just a copy of QVelDeviceCompZeroPress
+    ////////////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
+    unsigned int k000= KQK;
+    unsigned int kP00   = KQK;
+    unsigned int kM00   = neighborX[KQK];
+    unsigned int k0P0   = KQK;
+    unsigned int k0M0   = neighborY[KQK];
+    unsigned int k00P   = KQK;
+    unsigned int k00M   = neighborZ[KQK];
+    unsigned int kMM0  = neighborY[kM00];
+    unsigned int kPP0  = KQK;
+    unsigned int kPM0  = k0M0;
+    unsigned int kMP0  = kM00;
+    unsigned int kM0M  = neighborZ[kM00];
+    unsigned int kP0P  = KQK;
+    unsigned int kP0M  = k00M;
+    unsigned int kM0P  = kM00;
+    unsigned int k0PP  = KQK;
+    unsigned int k0MM  = neighborZ[k0M0];
+    unsigned int k0PM  = k00M;
+    unsigned int k0MP  = k0M0;
+    unsigned int kPMP = k0M0;
+    unsigned int kMPM = kM0M;
+    unsigned int kMPP = kM00;
+    unsigned int kPMM = k0MM;
+    unsigned int kMMP = kMM0;
+    unsigned int kPPM = k00M;
+    unsigned int kPPP = KQK;
+    unsigned int kMMM = neighborZ[kMM0];
+
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Set local distributions
+    //!
+    real f_M00 = (dist.f[DIR_P00])[kP00];
+    real f_P00 = (dist.f[DIR_M00])[kM00];
+    real f_0M0 = (dist.f[DIR_0P0])[k0P0];
+    real f_0P0 = (dist.f[DIR_0M0])[k0M0];
+    real f_00M = (dist.f[DIR_00P])[k00P];
+    real f_00P = (dist.f[DIR_00M])[k00M];
+    real f_MM0 = (dist.f[DIR_PP0])[kPP0];
+    real f_PP0 = (dist.f[DIR_MM0])[kMM0];
+    real f_MP0 = (dist.f[DIR_PM0])[kPM0];
+    real f_PM0 = (dist.f[DIR_MP0])[kMP0];
+    real f_M0M = (dist.f[DIR_P0P])[kP0P];
+    real f_P0P = (dist.f[DIR_M0M])[kM0M];
+    real f_M0P = (dist.f[DIR_P0M])[kP0M];
+    real f_P0M = (dist.f[DIR_M0P])[kM0P];
+    real f_0MM = (dist.f[DIR_0PP])[k0PP];
+    real f_0PP = (dist.f[DIR_0MM])[k0MM];
+    real f_0MP = (dist.f[DIR_0PM])[k0PM];
+    real f_0PM = (dist.f[DIR_0MP])[k0MP];
+    real f_MMM = (dist.f[DIR_PPP])[kPPP];
+    real f_PPM = (dist.f[DIR_MMP])[kMMP];
+    real f_MPM = (dist.f[DIR_PMP])[kPMP];
+    real f_PMM = (dist.f[DIR_MPP])[kMPP];
+    real f_MMP = (dist.f[DIR_PPM])[kPPM];
+    real f_PPP = (dist.f[DIR_MMM])[kMMM];
+    real f_MPP = (dist.f[DIR_PMM])[kPMM];
+    real f_PMP = (dist.f[DIR_MPM])[kMPM];
+
+    SubgridDistances27 subgridD;
+    getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+
+    ////////////////////////////////////////////////////////////////////////////////
+      real drho   =  f_PMP + f_MPP + f_PPP + f_MMP + f_PMM + f_MPM + f_PPM + f_MMM +
+                     f_0PM + f_0PP + f_0MP + f_0MM + f_P0M + f_M0P + f_P0P + f_M0M + f_PM0 + f_MP0 + f_PP0 + f_MM0 +
+                     f_00P + f_00M + f_0P0 + f_0M0 + f_P00 + f_M00 + ((dist.f[DIR_000])[k000]);
+
+      real vx1 =  (((f_PMP - f_MPM) - (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) +
+                      ((f_P0M - f_M0P)   + (f_P0P - f_M0M))   + ((f_PM0 - f_MP0)   + (f_PP0 - f_MM0)) +
+                      (f_P00 - f_M00)) / (c1o1 + drho);
+
+
+      real vx2 =   ((-(f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) +
+                       ((f_0PM - f_0MP)   + (f_0PP - f_0MM))    + (-(f_PM0 - f_MP0)  + (f_PP0 - f_MM0)) +
+                       (f_0P0 - f_0M0)) / (c1o1 + drho);
+
+      real vx3 =   (((f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) + (f_MMP - f_PPM)) +
+                       (-(f_0PM - f_0MP)  + (f_0PP - f_0MM))   + ((f_P0P - f_M0M)   - (f_P0M - f_M0P)) +
+                       (f_00P - f_00M)) / (c1o1 + drho);
+
+
+    // if(k==16383 || k==0) printf("k %d kQ %d drho = %f u %f v %f w %f\n",k, KQK, drho, vx1, vx2, vx3);
+      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
+    //////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Update distributions with subgrid distance (q) between zero and one
+    real feq, q, velocityLB, velocityBC;
+    q = (subgridD.q[DIR_P00])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
+    {
+        velocityLB = vx1;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = VeloX;
+        (dist.f[DIR_M00])[kM00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P00, f_M00, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_M00])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = -VeloX;
+        (dist.f[DIR_P00])[kP00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M00, f_P00, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_0P0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = VeloY;
+        (dist.f[DIR_0M0])[DIR_0M0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0P0, f_0M0, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_0M0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = -VeloY;
+        (dist.f[DIR_0P0])[k0P0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0M0, f_0P0, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_00P])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = VeloZ;
+        (dist.f[DIR_00M])[k00M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00P, f_00M, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_00M])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+        velocityBC = -VeloZ;
+        (dist.f[DIR_00P])[k00P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00M, f_00P, feq, omega, drho, velocityBC, c2o27);
+    }
+
+    q = (subgridD.q[DIR_PP0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 + vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloX + VeloY;
+        (dist.f[DIR_MM0])[kMM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PP0, f_MM0, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_MM0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 - vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloY;
+        (dist.f[DIR_PP0])[kPP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MM0, f_PP0, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_PM0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 - vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloX - VeloY;
+        (dist.f[DIR_MP0])[kMP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PM0, f_MP0, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_MP0])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 + vx2;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX + VeloY;
+        (dist.f[DIR_PM0])[kPM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MP0, f_PM0, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_P0P])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloX + VeloZ;
+        (dist.f[DIR_M0M])[kM0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0P, f_M0M, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_M0M])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kP0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0M, f_P0P, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_P0M])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloX - VeloZ;
+        (dist.f[DIR_M0P])[kM0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0M, f_M0P, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_M0P])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX + VeloZ;
+        (dist.f[DIR_P0M])[kP0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0P, f_P0M, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_0PP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloY + VeloZ;
+        (dist.f[DIR_0MM])[k0MM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0MM, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_0MM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloY - VeloZ;
+        (dist.f[DIR_0PP])[k0PP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0MM, f_0PP, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_0PM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = VeloY - VeloZ;
+        (dist.f[DIR_0MP])[k0MP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PM, f_0PP, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_0MP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloY + VeloZ;
+        (dist.f[DIR_0PM])[k0PM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0PM, feq, omega, drho, velocityBC, c1o54);
+    }
+
+    q = (subgridD.q[DIR_PPP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 + vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = VeloX + VeloY + VeloZ;
+        (dist.f[DIR_MMM])[kMMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPP, f_MMM, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_MMM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 - vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = -VeloX - VeloY - VeloZ;
+        (dist.f[DIR_PPP])[kPPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMM, f_PPP, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_PPM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 + vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = VeloX + VeloY - VeloZ;
+        (dist.f[DIR_MMP])[kMMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPM, f_MMP, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_MMP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 - vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = -VeloX - VeloY + VeloZ;
+        (dist.f[DIR_PPM])[kPPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMP, f_PPM, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_PMP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 - vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = VeloX - VeloY + VeloZ;
+        (dist.f[DIR_MPM])[kMPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMP, f_MPM, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_MPM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 + vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = -VeloX + VeloY - VeloZ;
+        (dist.f[DIR_PMP])[kPMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPM, f_PMP, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_PMM])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = vx1 - vx2 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = VeloX - VeloY - VeloZ;
+        (dist.f[DIR_MPP])[kMPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMM, f_MPP, feq, omega, drho, velocityBC, c1o216);
+    }
+
+    q = (subgridD.q[DIR_MPP])[nodeIndex];
+    if (q>=c0o1 && q<=c1o1)
+    {
+        velocityLB = -vx1 + vx2 + vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+        velocityBC = -VeloX + VeloY + VeloZ;
+        (dist.f[DIR_PMM])[kPMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPP, f_PMM, feq, omega, drho, velocityBC, c1o216);
+    }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void PrecursorDeviceEQ27(
+    int *subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real omega,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* vLast,
+    real* vCurrent,
+    real velocityX,
+    real velocityY,
+    real velocityZ,
+    real timeRatio,
+    real velocityRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    if(nodeIndex>=numberOfBCnodes) return;
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // interpolation of velocity
+    real vxLastInterpd, vyLastInterpd, vzLastInterpd;
+    real vxNextInterpd, vyNextInterpd, vzNextInterpd;
+
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
+
+    real* vxLast = vLast;
+    real* vyLast = &vLast[numberOfPrecursorNodes];
+    real* vzLast = &vLast[2*numberOfPrecursorNodes];
+
+    real* vxCurrent = vCurrent;
+    real* vyCurrent = &vCurrent[numberOfPrecursorNodes];
+    real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes];
+
+    if(d0PP < 1e6)
+    {
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
+
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
+
+        real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
+
+        vxLastInterpd = (vxLast[kNeighbor0PP]*d0PP + vxLast[kNeighbor0PM]*d0PM + vxLast[kNeighbor0MP]*d0MP + vxLast[kNeighbor0MM]*d0MM)*invWeightSum;
+        vyLastInterpd = (vyLast[kNeighbor0PP]*d0PP + vyLast[kNeighbor0PM]*d0PM + vyLast[kNeighbor0MP]*d0MP + vyLast[kNeighbor0MM]*d0MM)*invWeightSum;
+        vzLastInterpd = (vzLast[kNeighbor0PP]*d0PP + vzLast[kNeighbor0PM]*d0PM + vzLast[kNeighbor0MP]*d0MP + vzLast[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        vxNextInterpd = (vxCurrent[kNeighbor0PP]*d0PP + vxCurrent[kNeighbor0PM]*d0PM + vxCurrent[kNeighbor0MP]*d0MP + vxCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+        vyNextInterpd = (vyCurrent[kNeighbor0PP]*d0PP + vyCurrent[kNeighbor0PM]*d0PM + vyCurrent[kNeighbor0MP]*d0MP + vyCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+        vzNextInterpd = (vzCurrent[kNeighbor0PP]*d0PP + vzCurrent[kNeighbor0PM]*d0PM + vzCurrent[kNeighbor0MP]*d0MP + vzCurrent[kNeighbor0MM]*d0MM)*invWeightSum;
+    }
+    else
+    {
+        vxLastInterpd = vxLast[kNeighbor0PP];
+        vyLastInterpd = vyLast[kNeighbor0PP];
+        vzLastInterpd = vzLast[kNeighbor0PP];
+
+        vxNextInterpd = vxCurrent[kNeighbor0PP];
+        vyNextInterpd = vyCurrent[kNeighbor0PP];
+        vzNextInterpd = vzCurrent[kNeighbor0PP];
+    }
+
+    // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
+    real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio;
+    real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
+    // From here on just a copy of QVelDeviceCompZeroPress
+    ////////////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex]; //QK
+    unsigned int k000 = KQK; //000
+    unsigned int kP00 = KQK; //P00
+    unsigned int kM00 = neighborX[KQK]; //M00
+    unsigned int k0P0   = KQK; //n
+    unsigned int k0M0   = neighborY[KQK]; //s
+    unsigned int k00P   = KQK; //t
+    unsigned int k00M   = neighborZ[KQK]; //b
+    unsigned int kMM0  = neighborY[kM00]; //sw
+    unsigned int kPP0  = KQK; //ne
+    unsigned int kPM0  = k0M0; //se
+    unsigned int kMP0  = kM00; //nw
+    unsigned int kM0M  = neighborZ[kM00]; //bw
+    unsigned int kP0P  = KQK; //te
+    unsigned int kP0M  = k00M; //be
+    unsigned int k0PP  = KQK; //tn
+    unsigned int k0MM  = neighborZ[k0M0]; //bs
+    unsigned int kM0P  = kM00; //tw
+    unsigned int k0PM  = k00M; //bn
+    unsigned int k0MP  = k0M0; //ts
+    unsigned int kPMP = k0M0; //tse
+    unsigned int kMPM = kM0M; //bnw
+    unsigned int kMPP = kM00; //tnw
+    unsigned int kPMM = k0MM; //bse
+    unsigned int kMMP = kMM0; //tsw
+    unsigned int kPPM = k00M; //bne
+    unsigned int kPPP = KQK; //tne
+    unsigned int kMMM = neighborZ[kMM0]; //bsw
+
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // based on BGK Plus Comp
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    real f_M00 = (dist.f[DIR_P00])[kP00];
+    real f_P00 = (dist.f[DIR_M00])[kM00];
+    real f_0M0 = (dist.f[DIR_0P0])[k0P0];
+    real f_0P0 = (dist.f[DIR_0M0])[k0M0];
+    real f_00M = (dist.f[DIR_00P])[k00P];
+    real f_00P = (dist.f[DIR_00M])[k00M];
+    real f_MM0 = (dist.f[DIR_PP0])[kPP0];
+    real f_PP0 = (dist.f[DIR_MM0])[kMM0];
+    real f_MP0 = (dist.f[DIR_PM0])[kPM0];
+    real f_PM0 = (dist.f[DIR_MP0])[kMP0];
+    real f_M0M = (dist.f[DIR_P0P])[kP0P];
+    real f_P0P = (dist.f[DIR_M0M])[kM0M];
+    real f_M0P = (dist.f[DIR_P0M])[kP0M];
+    real f_P0M = (dist.f[DIR_M0P])[kM0P];
+    real f_0MM = (dist.f[DIR_0PP])[k0PP];
+    real f_0PP = (dist.f[DIR_0MM])[k0MM];
+    real f_0PM = (dist.f[DIR_0MP])[k0MP];
+    real f_0MP = (dist.f[DIR_0PM])[k0PM];
+    real f_000 = (dist.f[DIR_000])[k000];
+    real f_MMM = (dist.f[DIR_PPP])[kPPP];
+    real f_PPM = (dist.f[DIR_MMP])[kMMP];
+    real f_MPM = (dist.f[DIR_PMP])[kPMP];
+    real f_PMM = (dist.f[DIR_MPP])[kMPP];
+    real f_MMP = (dist.f[DIR_PPM])[kPPM];
+    real f_PPP = (dist.f[DIR_MMM])[kMMM];
+    real f_MPP = (dist.f[DIR_PMM])[kPMM];
+    real f_PMP = (dist.f[DIR_MPM])[kMPM];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set macroscopic quantities
+      //!
+      real drho = c0o1;
+
+      real vx1  = VeloX;
+
+      real vx2  = VeloY;
+
+      real vx3  = VeloZ;
+
+      real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      f_000 = c8o27* (drho-(drho+c1o1)*cusq);
+      f_P00 = c2o27* (drho+(drho+c1o1)*(c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cusq));
+      f_M00 = c2o27* (drho+(drho+c1o1)*(c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cusq));
+      f_0P0 = c2o27* (drho+(drho+c1o1)*(c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cusq));
+      f_0M0 = c2o27* (drho+(drho+c1o1)*(c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cusq));
+      f_00P = c2o27* (drho+(drho+c1o1)*(c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cusq));
+      f_00M = c2o27* (drho+(drho+c1o1)*(c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cusq));
+      f_PP0 = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cusq));
+      f_MM0 = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cusq));
+      f_PM0 = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cusq));
+      f_MP0 = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cusq));
+      f_P0P = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cusq));
+      f_M0M = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cusq));
+      f_P0M = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cusq));
+      f_M0P = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
+      f_0PP = c1o54* (drho+(drho+c1o1)*(c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cusq));
+      f_0MM = c1o54* (drho+(drho+c1o1)*(c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cusq));
+      f_0PM = c1o54* (drho+(drho+c1o1)*(c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cusq));
+      f_0MP = c1o54* (drho+(drho+c1o1)*(c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cusq));
+      f_PPP = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq));
+      f_MMM = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq));
+      f_PPM = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq));
+      f_MMP = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq));
+      f_PMP = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq));
+      f_MPM = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq));
+      f_PMM = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
+      f_MPP = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! write the new distributions to the bc nodes
+      //!
+      (dist.f[DIR_P00])[kP00] = f_M00;
+      (dist.f[DIR_PP0])[kPP0] = f_MM0;
+      (dist.f[DIR_P0M])[kP0M] = f_M0P;
+      (dist.f[DIR_PM0])[kPM0] = f_MP0;
+      (dist.f[DIR_PMP])[kPMP] = f_MPM;
+      (dist.f[DIR_P0P])[kP0P] = f_M0M;
+      (dist.f[DIR_PPM])[kPPM] = f_MMP;
+      (dist.f[DIR_PPP])[kPPP] = f_MMM;
+      (dist.f[DIR_PMM])[kPMM] = f_MPP;
+
+      (dist.f[DIR_M00])[kM00] = f_P00;
+      (dist.f[DIR_MM0])[kMM0] = f_PP0;
+      (dist.f[DIR_M0M])[kM0M] = f_P0P;
+      (dist.f[DIR_MP0])[kMP0] = f_PM0;
+      (dist.f[DIR_M0P])[kM0P] = f_P0M;
+      (dist.f[DIR_MMM])[kMMM] = f_PPP;
+      (dist.f[DIR_MMP])[kMMP] = f_PPM;
+      (dist.f[DIR_MPP])[kMPP] = f_PMM;
+      (dist.f[DIR_MPM])[kMPM] = f_PMP;
+
+      (dist.f[DIR_0P0])[k0P0] = f_0M0;
+      (dist.f[DIR_0M0])[k0M0] = f_0P0;
+      (dist.f[DIR_00P])[k00P] = f_00M;
+      (dist.f[DIR_00M])[k00M] = f_00P;
+      (dist.f[DIR_0PP])[k0PP] = f_0MM;
+      (dist.f[DIR_0MM])[k0MM] = f_0PP;
+      (dist.f[DIR_0PM])[k0PM] = f_0MP;
+      (dist.f[DIR_0MP])[k0MP] = f_0PM;
+      (dist.f[DIR_000])[k000] = f_000;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void PrecursorDeviceDistributions(
+    int *subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* fsLast,
+    real* fsNext,
+    real timeRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    if(nodeIndex>=numberOfBCnodes) return;
+
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
+
+    real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp;
+    real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp;
+
+    real* f0Last = fsLast;
+    real* f1Last = &fsLast[  numberOfPrecursorNodes];
+    real* f2Last = &fsLast[2*numberOfPrecursorNodes];
+    real* f3Last = &fsLast[3*numberOfPrecursorNodes];
+    real* f4Last = &fsLast[4*numberOfPrecursorNodes];
+    real* f5Last = &fsLast[5*numberOfPrecursorNodes];
+    real* f6Last = &fsLast[6*numberOfPrecursorNodes];
+    real* f7Last = &fsLast[7*numberOfPrecursorNodes];
+    real* f8Last = &fsLast[8*numberOfPrecursorNodes];
+
+    real* f0Next = fsNext;
+    real* f1Next = &fsNext[  numberOfPrecursorNodes];
+    real* f2Next = &fsNext[2*numberOfPrecursorNodes];
+    real* f3Next = &fsNext[3*numberOfPrecursorNodes];
+    real* f4Next = &fsNext[4*numberOfPrecursorNodes];
+    real* f5Next = &fsNext[5*numberOfPrecursorNodes];
+    real* f6Next = &fsNext[6*numberOfPrecursorNodes];
+    real* f7Next = &fsNext[7*numberOfPrecursorNodes];
+    real* f8Next = &fsNext[8*numberOfPrecursorNodes];
+
+
+    if(d0PP<1e6)
+    {
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
+
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
+
+        real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
+
+        f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+    } else {
+        f0LastInterp = f0Last[kNeighbor0PP];
+        f1LastInterp = f1Last[kNeighbor0PP];
+        f2LastInterp = f2Last[kNeighbor0PP];
+        f3LastInterp = f3Last[kNeighbor0PP];
+        f4LastInterp = f4Last[kNeighbor0PP];
+        f5LastInterp = f5Last[kNeighbor0PP];
+        f6LastInterp = f6Last[kNeighbor0PP];
+        f7LastInterp = f7Last[kNeighbor0PP];
+        f8LastInterp = f8Last[kNeighbor0PP];
+
+        f0NextInterp = f0Next[kNeighbor0PP];
+        f1NextInterp = f1Next[kNeighbor0PP];
+        f2NextInterp = f2Next[kNeighbor0PP];
+        f3NextInterp = f3Next[kNeighbor0PP];
+        f4NextInterp = f4Next[kNeighbor0PP];
+        f5NextInterp = f5Next[kNeighbor0PP];
+        f6NextInterp = f6Next[kNeighbor0PP];
+        f7NextInterp = f7Next[kNeighbor0PP];
+        f8NextInterp = f8Next[kNeighbor0PP];
+    }
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
+    // unsigned int k000= KQK;
+    unsigned int kP00   = KQK;
+    // unsigned int kM00   = neighborX[KQK];
+    // unsigned int k0P0   = KQK;
+    unsigned int k0M0   = neighborY[KQK];
+    // unsigned int k00P   = KQK;
+    unsigned int k00M   = neighborZ[KQK];
+    // unsigned int kMM0  = neighborY[kM00];
+    unsigned int kPP0  = KQK;
+    unsigned int kPM0  = k0M0;
+    // unsigned int kMP0  = kM00;
+    // unsigned int kM0M  = neighborZ[kM00];
+    unsigned int kP0P  = KQK;
+    unsigned int kP0M  = k00M;
+    // unsigned int kM0P  = kM00;
+    unsigned int k0MM  = neighborZ[k0M0];
+    // unsigned int k0PM  = k00M;
+    // unsigned int k0MP  = k0M0;
+    unsigned int kPMP = k0M0;
+    // unsigned int kMPM = kM0M;
+    // unsigned int kMPP = kM00;
+    unsigned int kPMM = k0MM;
+    // unsigned int kMMP = kMM0;
+    unsigned int kPPM = k00M;
+    unsigned int kPPP = KQK;
+    // unsigned int kMMM = neighborZ[kMM0];
+
+    dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
+    dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
+    dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
+    dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
+    dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
+    dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
+    dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
+    dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
+    dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// NOTE: Has not been tested after bug fix!
+__global__ void QPrecursorDeviceDistributions(
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    int sizeQ,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* fsLast,
+    real* fsNext,
+    real timeRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    if(nodeIndex>=numberOfBCnodes) return;
+
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
+
+    real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp;
+    real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp;
+
+    real* f0Last = fsLast;
+    real* f1Last = &fsLast[  numberOfPrecursorNodes];
+    real* f2Last = &fsLast[2*numberOfPrecursorNodes];
+    real* f3Last = &fsLast[3*numberOfPrecursorNodes];
+    real* f4Last = &fsLast[4*numberOfPrecursorNodes];
+    real* f5Last = &fsLast[5*numberOfPrecursorNodes];
+    real* f6Last = &fsLast[6*numberOfPrecursorNodes];
+    real* f7Last = &fsLast[7*numberOfPrecursorNodes];
+    real* f8Last = &fsLast[8*numberOfPrecursorNodes];
+
+    real* f0Next = fsNext;
+    real* f1Next = &fsNext[  numberOfPrecursorNodes];
+    real* f2Next = &fsNext[2*numberOfPrecursorNodes];
+    real* f3Next = &fsNext[3*numberOfPrecursorNodes];
+    real* f4Next = &fsNext[4*numberOfPrecursorNodes];
+    real* f5Next = &fsNext[5*numberOfPrecursorNodes];
+    real* f6Next = &fsNext[6*numberOfPrecursorNodes];
+    real* f7Next = &fsNext[7*numberOfPrecursorNodes];
+    real* f8Next = &fsNext[8*numberOfPrecursorNodes];
+
+
+    if(d0PP<1e6)
+    {
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
+
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
+
+        real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
+
+        f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+        f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum;
+        f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum;
+
+    } else {
+        f0LastInterp = f0Last[kNeighbor0PP];
+        f1LastInterp = f1Last[kNeighbor0PP];
+        f2LastInterp = f2Last[kNeighbor0PP];
+        f3LastInterp = f3Last[kNeighbor0PP];
+        f4LastInterp = f4Last[kNeighbor0PP];
+        f5LastInterp = f5Last[kNeighbor0PP];
+        f6LastInterp = f6Last[kNeighbor0PP];
+        f7LastInterp = f7Last[kNeighbor0PP];
+        f8LastInterp = f8Last[kNeighbor0PP];
+
+        f0NextInterp = f0Next[kNeighbor0PP];
+        f1NextInterp = f1Next[kNeighbor0PP];
+        f2NextInterp = f2Next[kNeighbor0PP];
+        f3NextInterp = f3Next[kNeighbor0PP];
+        f4NextInterp = f4Next[kNeighbor0PP];
+        f5NextInterp = f5Next[kNeighbor0PP];
+        f6NextInterp = f6Next[kNeighbor0PP];
+        f7NextInterp = f7Next[kNeighbor0PP];
+        f8NextInterp = f8Next[kNeighbor0PP];
+    }
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
+    // unsigned int k000= KQK;
+    unsigned int kP00   = KQK;
+    // unsigned int kM00   = neighborX[KQK];
+    // unsigned int k0P0   = KQK;
+    unsigned int k0M0   = neighborY[KQK];
+    // unsigned int k00P   = KQK;
+    unsigned int k00M   = neighborZ[KQK];
+    // unsigned int kMM0  = neighborY[kM00];
+    unsigned int kPP0  = KQK;
+    unsigned int kPM0  = k0M0;
+    // unsigned int kMP0  = kM00;
+    // unsigned int kM0M  = neighborZ[kM00];
+    unsigned int kP0P  = KQK;
+    unsigned int kP0M  = k00M;
+    // unsigned int kM0P  = kM00;
+    unsigned int k0MM  = neighborZ[k0M0];
+    // unsigned int k0PM  = k00M;
+    // unsigned int k0MP  = k0M0;
+    unsigned int kPMP = k0M0;
+    // unsigned int kMPM = kM0M;
+    // unsigned int kMPP = kM00;
+    unsigned int kPMM = k0MM;
+    // unsigned int kMMP = kMM0;
+    unsigned int kPPM = k00M;
+    unsigned int kPPP = KQK;
+    // unsigned int kMMM = neighborZ[kMM0];
+    SubgridDistances27 qs;
+    getPointersToSubgridDistances(qs, subgridDistances, sizeQ);
+
+    real q;
+    q = qs.q[DIR_P00][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
+    q = qs.q[DIR_PP0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
+    q = qs.q[DIR_PM0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
+    q = qs.q[DIR_P0P][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
+    q = qs.q[DIR_P0M][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
+    q = qs.q[DIR_PPP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
+    q = qs.q[DIR_PMP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
+    q = qs.q[DIR_PPM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
+    q = qs.q[DIR_PMM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
+
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index ccb2ce79c63515e59e4f9ae75016f44ced71a170..02cfd2bce3723162b645cef568c87ca3b1dd2720 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -1,29 +1,63 @@
-/* Device code */
-#include "LBM/LB.h" 
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PressBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
+#include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
-#include "KernelUtilities.h"
+#include "lbm/MacroscopicQuantities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
-														 real* DD, 
-														 int* k_Q, 
-														 int* k_N, 
-														 int numberOfBCnodes, 
-														 real om1, 
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat, 
-														 bool isEvenTimestep)
+__global__ void QInflowScaleByPressDevice27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -97,141 +131,141 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (D.f[DIR_P00   ])[k1e   ];
-      real f1_W    = (D.f[DIR_M00   ])[k1w   ];
-      real f1_N    = (D.f[DIR_0P0   ])[k1n   ];
-      real f1_S    = (D.f[DIR_0M0   ])[k1s   ];
-      real f1_T    = (D.f[DIR_00P   ])[k1t   ];
-      real f1_B    = (D.f[DIR_00M   ])[k1b   ];
-      real f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
-      real f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
-      real f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
-      real f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
-      real f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
-      real f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
-      real f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
-      real f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
-      real f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
-      real f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
-      real f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
-      real f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+      real f1_E    = (D.f[DIR_P00])[k1e   ];
+      real f1_W    = (D.f[DIR_M00])[k1w   ];
+      real f1_N    = (D.f[DIR_0P0])[k1n   ];
+      real f1_S    = (D.f[DIR_0M0])[k1s   ];
+      real f1_T    = (D.f[DIR_00P])[k1t   ];
+      real f1_B    = (D.f[DIR_00M])[k1b   ];
+      real f1_NE   = (D.f[DIR_PP0])[k1ne  ];
+      real f1_SW   = (D.f[DIR_MM0])[k1sw  ];
+      real f1_SE   = (D.f[DIR_PM0])[k1se  ];
+      real f1_NW   = (D.f[DIR_MP0])[k1nw  ];
+      real f1_TE   = (D.f[DIR_P0P])[k1te  ];
+      real f1_BW   = (D.f[DIR_M0M])[k1bw  ];
+      real f1_BE   = (D.f[DIR_P0M])[k1be  ];
+      real f1_TW   = (D.f[DIR_M0P])[k1tw  ];
+      real f1_TN   = (D.f[DIR_0PP])[k1tn  ];
+      real f1_BS   = (D.f[DIR_0MM])[k1bs  ];
+      real f1_BN   = (D.f[DIR_0PM])[k1bn  ];
+      real f1_TS   = (D.f[DIR_0MP])[k1ts  ];
       //real f1_ZERO = (D.f[DIR_000])[k1zero];
-      real f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
-      real f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
-      real f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
-      real f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
-      real f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
-      real f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
-      real f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
-      real f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
+      real f1_TNE  = (D.f[DIR_PPP])[k1tne ];
+      real f1_TSW  = (D.f[DIR_MMP])[k1tsw ];
+      real f1_TSE  = (D.f[DIR_PMP])[k1tse ];
+      real f1_TNW  = (D.f[DIR_MPP])[k1tnw ];
+      real f1_BNE  = (D.f[DIR_PPM])[k1bne ];
+      real f1_BSW  = (D.f[DIR_MMM])[k1bsw ];
+      real f1_BSE  = (D.f[DIR_PMM])[k1bse ];
+      real f1_BNW  = (D.f[DIR_MPM])[k1bnw ];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (D.f[DIR_P00   ])[ke   ];
-      real f_W    = (D.f[DIR_M00   ])[kw   ];
-      real f_N    = (D.f[DIR_0P0   ])[kn   ];
-      real f_S    = (D.f[DIR_0M0   ])[ks   ];
-      real f_T    = (D.f[DIR_00P   ])[kt   ];
-      real f_B    = (D.f[DIR_00M   ])[kb   ];
-      real f_NE   = (D.f[DIR_PP0  ])[kne  ];
-      real f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_SE   = (D.f[DIR_PM0  ])[kse  ];
-      real f_NW   = (D.f[DIR_MP0  ])[knw  ];
-      real f_TE   = (D.f[DIR_P0P  ])[kte  ];
-      real f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      real f_E    = (D.f[DIR_P00])[ke   ];
+      real f_W    = (D.f[DIR_M00])[kw   ];
+      real f_N    = (D.f[DIR_0P0])[kn   ];
+      real f_S    = (D.f[DIR_0M0])[ks   ];
+      real f_T    = (D.f[DIR_00P])[kt   ];
+      real f_B    = (D.f[DIR_00M])[kb   ];
+      real f_NE   = (D.f[DIR_PP0])[kne  ];
+      real f_SW   = (D.f[DIR_MM0])[ksw  ];
+      real f_SE   = (D.f[DIR_PM0])[kse  ];
+      real f_NW   = (D.f[DIR_MP0])[knw  ];
+      real f_TE   = (D.f[DIR_P0P])[kte  ];
+      real f_BW   = (D.f[DIR_M0M])[kbw  ];
+      real f_BE   = (D.f[DIR_P0M])[kbe  ];
+      real f_TW   = (D.f[DIR_M0P])[ktw  ];
+      real f_TN   = (D.f[DIR_0PP])[ktn  ];
+      real f_BS   = (D.f[DIR_0MM])[kbs  ];
+      real f_BN   = (D.f[DIR_0PM])[kbn  ];
+      real f_TS   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_TNE  = (D.f[DIR_PPP ])[ktne ];
-      real f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      real f_TSE  = (D.f[DIR_PMP ])[ktse ];
-      real f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      real f_BNE  = (D.f[DIR_PPM ])[kbne ];
-      real f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      real f_BSE  = (D.f[DIR_PMM ])[kbse ];
-      real f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+      real f_TNE  = (D.f[DIR_PPP])[ktne ];
+      real f_TSW  = (D.f[DIR_MMP])[ktsw ];
+      real f_TSE  = (D.f[DIR_PMP])[ktse ];
+      real f_TNW  = (D.f[DIR_MPP])[ktnw ];
+      real f_BNE  = (D.f[DIR_PPM])[kbne ];
+      real f_BSW  = (D.f[DIR_MMM])[kbsw ];
+      real f_BSE  = (D.f[DIR_PMM])[kbse ];
+      real f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       // real vx1, vx2, vx3;
       real drho, drho1;
       //////////////////////////////////////////////////////////////////////////
-	  //Dichte
+     //Dichte
       drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-                f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
+                f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW +
+                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]);
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
+                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
       //////////////////////////////////////////////////////////////////////////
-	  //Schallgeschwindigkeit
-	  real cs = c1o1 / sqrtf(c3o1);
+     //Schallgeschwindigkeit
+     real cs = c1o1 / sqrtf(c3o1);
       //////////////////////////////////////////////////////////////////////////
-	  real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho; 
-	  //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one);
-	  real diffRhoToAdd = rhoBC[k] - rhoInterpol;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //no velocity
-	  //////////////////////////////////////////
+     real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho;
+     //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one);
+     real diffRhoToAdd = rhoBC[k] - rhoInterpol;
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     //no velocity
+     //////////////////////////////////////////
       f_E    = f1_E   * cs + (c1o1 - cs) * f_E   ;
       f_W    = f1_W   * cs + (c1o1 - cs) * f_W   ;
       f_N    = f1_N   * cs + (c1o1 - cs) * f_N   ;
@@ -258,16 +292,16 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       f_BSW  = f1_BSW * cs + (c1o1 - cs) * f_BSW ;
       f_BSE  = f1_BSE * cs + (c1o1 - cs) * f_BSE ;
       f_BNW  = f1_BNW * cs + (c1o1 - cs) * f_BNW ;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //scale by press
-	  //////////////////////////////////////////
-	  //f_E    = (f_E   + c2over27 ) * diffRho - c2over27 ;
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     //scale by press
+     //////////////////////////////////////////
+     //f_E    = (f_E   + c2over27 ) * diffRho - c2over27 ;
    //   f_W    = (f_W   + c2over27 ) * diffRho - c2over27 ;
    //   f_N    = (f_N   + c2over27 ) * diffRho - c2over27 ;
    //   f_S    = (f_S   + c2over27 ) * diffRho - c2over27 ;
    //   f_T    = (f_T   + c2over27 ) * diffRho - c2over27 ;
    //   f_B    = (f_B   + c2over27 ) * diffRho - c2over27 ;
-	  //f_NE   = (f_NE  + c1over54 ) * diffRho - c1over54 ;
+     //f_NE   = (f_NE  + c1over54 ) * diffRho - c1over54 ;
    //   f_SW   = (f_SW  + c1over54 ) * diffRho - c1over54 ;
    //   f_SE   = (f_SE  + c1over54 ) * diffRho - c1over54 ;
    //   f_NW   = (f_NW  + c1over54 ) * diffRho - c1over54 ;
@@ -287,16 +321,16 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
    //   f_BSW  = (f_BSW + c1over216) * diffRho - c1over216;
    //   f_BSE  = (f_BSE + c1over216) * diffRho - c1over216;
    //   f_BNW  = (f_BNW + c1over216) * diffRho - c1over216;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  // add press
-	  //////////////////////////////////////////
-	  f_E    = (f_E   + c2o27  * diffRhoToAdd);
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     // add press
+     //////////////////////////////////////////
+     f_E    = (f_E   + c2o27  * diffRhoToAdd);
       f_W    = (f_W   + c2o27  * diffRhoToAdd);
       f_N    = (f_N   + c2o27  * diffRhoToAdd);
       f_S    = (f_S   + c2o27  * diffRhoToAdd);
       f_T    = (f_T   + c2o27  * diffRhoToAdd);
       f_B    = (f_B   + c2o27  * diffRhoToAdd);
-	  f_NE   = (f_NE  + c1o54  * diffRhoToAdd);
+     f_NE   = (f_NE  + c1o54  * diffRhoToAdd);
       f_SW   = (f_SW  + c1o54  * diffRhoToAdd);
       f_SE   = (f_SE  + c1o54  * diffRhoToAdd);
       f_NW   = (f_NW  + c1o54  * diffRhoToAdd);
@@ -316,111 +350,111 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       f_BSW  = (f_BSW + c1o216 * diffRhoToAdd);
       f_BSE  = (f_BSE + c1o216 * diffRhoToAdd);
       f_BNW  = (f_BNW + c1o216 * diffRhoToAdd);
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-	  //////////////////////////////////////////////////////////////////////////
+     //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////
       //__syncthreads();
-	  // -X
-	  //(D.f[DIR_P00   ])[ke   ] = f_E   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;     
-	  // X
-	  (D.f[DIR_M00   ])[kw   ] = f_W   ;
-	  (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
-	  (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
-	  (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
-	  (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
-	  (D.f[DIR_MPM ])[kbnw ] = f_BNW ;     
-	  // Y
-	  //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
-	  // Z
-	  //(D.f[DIR_00M   ])[kb   ] = f_B   ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
-	  //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
+     // -X
+     //(D.f[DIR_P00])[ke   ] = f_E   ;
+     //(D.f[DIR_PM0])[kse  ] = f_SE  ;
+     //(D.f[DIR_PP0])[kne  ] = f_NE  ;
+     //(D.f[DIR_P0M])[kbe  ] = f_BE  ;
+     //(D.f[DIR_P0P])[kte  ] = f_TE  ;
+     //(D.f[DIR_PMP])[ktse ] = f_TSE ;
+     //(D.f[DIR_PPP])[ktne ] = f_TNE ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_PPM])[kbne ] = f_BNE ;
+     // X
+     (D.f[DIR_M00])[kw   ] = f_W   ;
+     (D.f[DIR_MM0])[ksw  ] = f_SW  ;
+     (D.f[DIR_MP0])[knw  ] = f_NW  ;
+     (D.f[DIR_M0M])[kbw  ] = f_BW  ;
+     (D.f[DIR_M0P])[ktw  ] = f_TW  ;
+     (D.f[DIR_MMP])[ktsw ] = f_TSW ;
+     (D.f[DIR_MPP])[ktnw ] = f_TNW ;
+     (D.f[DIR_MMM])[kbsw ] = f_BSW ;
+     (D.f[DIR_MPM])[kbnw ] = f_BNW ;
+     // Y
+     //(D.f[DIR_0M0])[ks   ] = f_S   ;
+     //(D.f[DIR_PM0])[kse  ] = f_SE  ;
+     //(D.f[DIR_MM0])[ksw  ] = f_SW  ;
+     //(D.f[DIR_0MP])[kts  ] = f_TS  ;
+     //(D.f[DIR_0MM])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PMP])[ktse ] = f_TSE ;
+     //(D.f[DIR_MMP])[ktsw ] = f_TSW ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM])[kbsw ] = f_BSW ;
+     // Z
+     //(D.f[DIR_00M])[kb   ] = f_B   ;
+     //(D.f[DIR_P0M])[kbe  ] = f_BE  ;
+     //(D.f[DIR_M0M])[kbw  ] = f_BW  ;
+     //(D.f[DIR_0PM])[kbn  ] = f_BN  ;
+     //(D.f[DIR_0MM])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PPM])[kbne ] = f_BNE ;
+     //(D.f[DIR_MPM])[kbnw ] = f_BNW ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM])[kbsw ] = f_BSW ;
       //////////////////////////////////////////////////////////////////////////
    }
 }
@@ -465,22 +499,23 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
-													real* DD, 
-													int* k_Q, 
-													int* k_N, 
-													int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+__global__ void QPressDeviceIncompNEQ27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -554,112 +589,112 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true) //// ACHTUNG PREColl !!!!!!!!!!!!!!
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                           f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
       real vx1      =  ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						  ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-						  (f1_E - f1_W); 
+                    ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
+                    (f1_E - f1_W);
 
 
       real vx2    =   (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						 ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-						 (f1_N - f1_S); 
+                   ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
+                   (f1_N - f1_S);
 
       real vx3    =   ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
-						 (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-						 (f1_T - f1_B); 
+                   (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
+                   (f1_T - f1_B);
 
       real cusq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
@@ -690,15 +725,15 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       f1_BNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq));
       f1_BSE   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
-	   
-	  drho1 = rhoBC[k];
 
-	  //if(vx1 < zero){
-		 // vx1 *= 0.9;
-	  //}
-	  //if(vx2 < zero){
-		 // vx2 *= c1o10;//0.9;
-	  //}
+     drho1 = rhoBC[k];
+
+     //if(vx1 < zero){
+       // vx1 *= 0.9;
+     //}
+     //if(vx2 < zero){
+       // vx2 *= c1o10;//0.9;
+     //}
 
       f1_ZERO  += c8o27*  (drho1-(drho1+c1o1)*cusq);
       f1_E     += c2o27*  (drho1+(drho1+c1o1)*(c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cusq));
@@ -728,39 +763,39 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       f1_BSE   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  //drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     //drho1 = drho1 - rhoBC[k];
       //////////////////////////////////////////////////////////////////////////
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   ;  
-      (D.f[DIR_M00   ])[kw   ] = f1_E   ;	
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   ;	
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   ;	
-      (D.f[DIR_00P   ])[kt   ] = f1_B   ;	
-      (D.f[DIR_00M   ])[kb   ] = f1_T   ;	
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  ;	
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  ;	
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  ;	
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  ;	
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  ;	
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  ;	
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  ;	
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  ;	
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  ;	
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  ;	
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  ;	
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  ;	
-      (D.f[DIR_000])[kzero] = f1_ZERO;	
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW ;	
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE ;	
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW ;	
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE ;	
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW ;	
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE ;	
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW ;	
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE ;       
+      (D.f[DIR_P00])[ke   ] = f1_W   ;
+      (D.f[DIR_M00])[kw   ] = f1_E   ;
+      (D.f[DIR_0P0])[kn   ] = f1_S   ;
+      (D.f[DIR_0M0])[ks   ] = f1_N   ;
+      (D.f[DIR_00P])[kt   ] = f1_B   ;
+      (D.f[DIR_00M])[kb   ] = f1_T   ;
+      (D.f[DIR_PP0])[kne  ] = f1_SW  ;
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  ;
+      (D.f[DIR_PM0])[kse  ] = f1_NW  ;
+      (D.f[DIR_MP0])[knw  ] = f1_SE  ;
+      (D.f[DIR_P0P])[kte  ] = f1_BW  ;
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  ;
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  ;
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  ;
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  ;
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  ;
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  ;
+      (D.f[DIR_0MP])[kts  ] = f1_BN  ;
+      (D.f[DIR_000])[kzero] = f1_ZERO;
+      (D.f[DIR_PPP])[ktne ] = f1_BSW ;
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE ;
+      (D.f[DIR_PMP])[ktse ] = f1_BNW ;
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE ;
+      (D.f[DIR_PPM])[kbne ] = f1_TSW ;
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE ;
+      (D.f[DIR_PMM])[kbse ] = f1_TNW ;
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -804,54 +839,49 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceNEQ27(real* rhoBC,
-                                             real* distribution, 
-                                             int* bcNodeIndices,
-                                             int* bcNeighborIndices,
-                                             int numberOfBCnodes,
-                                             real omega1, 
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int numberOfLBnodes, 
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceNEQ27(
+    real* rhoBC,
+    real* distributions,
+    int* bcNodeIndices,
+    int* bcNeighborIndices,
+    int numberOfBCnodes,
+    real omega1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   //////////////////////////////////////////////////////////////////////////
-	//! The pressure boundary condition is executed in the following steps
-	//!
-	////////////////////////////////////////////////////////////////////////////////
-	//! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-	//!
-   const unsigned x = threadIdx.x;    // global x-index 
-   const unsigned y = blockIdx.x;     // global y-index 
-   const unsigned z = blockIdx.y;     // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   ////////////////////////////////////////////////////////////////////////////////
+   //! The pressure boundary condition is executed in the following steps
+   //!
 
-   const unsigned k = nx*(ny*z + y) + x;
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
-   //////////////////////////////////////////////////////////////////////////
+   ////////////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
       //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
       //!
       Distributions27 dist;
-      getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep);
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local pressure
       //!
-      real rhoBClocal = rhoBC[k];
+      real rhoBClocal = rhoBC[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int KQK  = bcNodeIndices[k];
+      unsigned int KQK  = bcNodeIndices[nodeIndex];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -882,7 +912,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing) for neighboring node
       //!
-      unsigned int K1QK  = bcNeighborIndices[k];
+      unsigned int K1QK  = bcNeighborIndices[nodeIndex];
       unsigned int k1zero= K1QK;
       unsigned int k1e   = K1QK;
       unsigned int k1w   = neighborX[K1QK];
@@ -914,52 +944,52 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions for neighboring node
       //!
-      real f1_W    = (dist.f[DIR_P00   ])[k1e   ];
-      real f1_E    = (dist.f[DIR_M00   ])[k1w   ];
-      real f1_S    = (dist.f[DIR_0P0   ])[k1n   ];
-      real f1_N    = (dist.f[DIR_0M0   ])[k1s   ];
-      real f1_B    = (dist.f[DIR_00P   ])[k1t   ];
-      real f1_T    = (dist.f[DIR_00M   ])[k1b   ];
-      real f1_SW   = (dist.f[DIR_PP0  ])[k1ne  ];
-      real f1_NE   = (dist.f[DIR_MM0  ])[k1sw  ];
-      real f1_NW   = (dist.f[DIR_PM0  ])[k1se  ];
-      real f1_SE   = (dist.f[DIR_MP0  ])[k1nw  ];
-      real f1_BW   = (dist.f[DIR_P0P  ])[k1te  ];
-      real f1_TE   = (dist.f[DIR_M0M  ])[k1bw  ];
-      real f1_TW   = (dist.f[DIR_P0M  ])[k1be  ];
-      real f1_BE   = (dist.f[DIR_M0P  ])[k1tw  ];
-      real f1_BS   = (dist.f[DIR_0PP  ])[k1tn  ];
-      real f1_TN   = (dist.f[DIR_0MM  ])[k1bs  ];
-      real f1_TS   = (dist.f[DIR_0PM  ])[k1bn  ];
-      real f1_BN   = (dist.f[DIR_0MP  ])[k1ts  ];
+      real f1_W    = (dist.f[DIR_P00])[k1e   ];
+      real f1_E    = (dist.f[DIR_M00])[k1w   ];
+      real f1_S    = (dist.f[DIR_0P0])[k1n   ];
+      real f1_N    = (dist.f[DIR_0M0])[k1s   ];
+      real f1_B    = (dist.f[DIR_00P])[k1t   ];
+      real f1_T    = (dist.f[DIR_00M])[k1b   ];
+      real f1_SW   = (dist.f[DIR_PP0])[k1ne  ];
+      real f1_NE   = (dist.f[DIR_MM0])[k1sw  ];
+      real f1_NW   = (dist.f[DIR_PM0])[k1se  ];
+      real f1_SE   = (dist.f[DIR_MP0])[k1nw  ];
+      real f1_BW   = (dist.f[DIR_P0P])[k1te  ];
+      real f1_TE   = (dist.f[DIR_M0M])[k1bw  ];
+      real f1_TW   = (dist.f[DIR_P0M])[k1be  ];
+      real f1_BE   = (dist.f[DIR_M0P])[k1tw  ];
+      real f1_BS   = (dist.f[DIR_0PP])[k1tn  ];
+      real f1_TN   = (dist.f[DIR_0MM])[k1bs  ];
+      real f1_TS   = (dist.f[DIR_0PM])[k1bn  ];
+      real f1_BN   = (dist.f[DIR_0MP])[k1ts  ];
       real f1_ZERO = (dist.f[DIR_000])[k1zero];
-      real f1_BSW  = (dist.f[DIR_PPP ])[k1tne ];
-      real f1_BNE  = (dist.f[DIR_MMP ])[k1tsw ];
-      real f1_BNW  = (dist.f[DIR_PMP ])[k1tse ];
-      real f1_BSE  = (dist.f[DIR_MPP ])[k1tnw ];
-      real f1_TSW  = (dist.f[DIR_PPM ])[k1bne ];
-      real f1_TNE  = (dist.f[DIR_MMM ])[k1bsw ];
-      real f1_TNW  = (dist.f[DIR_PMM ])[k1bse ];
-      real f1_TSE  = (dist.f[DIR_MPM ])[k1bnw ];
+      real f1_BSW  = (dist.f[DIR_PPP])[k1tne ];
+      real f1_BNE  = (dist.f[DIR_MMP])[k1tsw ];
+      real f1_BNW  = (dist.f[DIR_PMP])[k1tse ];
+      real f1_BSE  = (dist.f[DIR_MPP])[k1tnw ];
+      real f1_TSW  = (dist.f[DIR_PPM])[k1bne ];
+      real f1_TNE  = (dist.f[DIR_MMM])[k1bsw ];
+      real f1_TNW  = (dist.f[DIR_PMM])[k1bse ];
+      real f1_TSE  = (dist.f[DIR_MPM])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities (for neighboring node)
       //!
       real drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-                   f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]); 
+                   f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW +
+                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]);
 
       real vx1  = (((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                    ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-                   (f1_E - f1_W)) / (c1o1 + drho1);          
+                   (f1_E - f1_W)) / (c1o1 + drho1);
 
       real vx2  = ((-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                    ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-                   (f1_N - f1_S)) / (c1o1 + drho1); 
+                   (f1_N - f1_S)) / (c1o1 + drho1);
 
       real vx3  = (((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
                    (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-                   (f1_T - f1_B)) / (c1o1 + drho1); 
+                   (f1_T - f1_B)) / (c1o1 + drho1);
 
       real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
@@ -1037,33 +1067,33 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! write the new distributions to the bc nodes
       //!
-      (dist.f[DIR_P00   ])[ke   ] = f1_W   ;
-      (dist.f[DIR_M00   ])[kw   ] = f1_E   ;
-      (dist.f[DIR_0P0   ])[kn   ] = f1_S   ;
-      (dist.f[DIR_0M0   ])[ks   ] = f1_N   ;
-      (dist.f[DIR_00P   ])[kt   ] = f1_B   ;
-      (dist.f[DIR_00M   ])[kb   ] = f1_T   ;
-      (dist.f[DIR_PP0  ])[kne  ] = f1_SW  ;
-      (dist.f[DIR_MM0  ])[ksw  ] = f1_NE  ;
-      (dist.f[DIR_PM0  ])[kse  ] = f1_NW  ;
-      (dist.f[DIR_MP0  ])[knw  ] = f1_SE  ;
-      (dist.f[DIR_P0P  ])[kte  ] = f1_BW  ;
-      (dist.f[DIR_M0M  ])[kbw  ] = f1_TE  ;
-      (dist.f[DIR_P0M  ])[kbe  ] = f1_TW  ;
-      (dist.f[DIR_M0P  ])[ktw  ] = f1_BE  ;
-      (dist.f[DIR_0PP  ])[ktn  ] = f1_BS  ;
-      (dist.f[DIR_0MM  ])[kbs  ] = f1_TN  ;
-      (dist.f[DIR_0PM  ])[kbn  ] = f1_TS  ;
-      (dist.f[DIR_0MP  ])[kts  ] = f1_BN  ;
+      (dist.f[DIR_P00])[ke   ] = f1_W   ;
+      (dist.f[DIR_M00])[kw   ] = f1_E   ;
+      (dist.f[DIR_0P0])[kn   ] = f1_S   ;
+      (dist.f[DIR_0M0])[ks   ] = f1_N   ;
+      (dist.f[DIR_00P])[kt   ] = f1_B   ;
+      (dist.f[DIR_00M])[kb   ] = f1_T   ;
+      (dist.f[DIR_PP0])[kne  ] = f1_SW  ;
+      (dist.f[DIR_MM0])[ksw  ] = f1_NE  ;
+      (dist.f[DIR_PM0])[kse  ] = f1_NW  ;
+      (dist.f[DIR_MP0])[knw  ] = f1_SE  ;
+      (dist.f[DIR_P0P])[kte  ] = f1_BW  ;
+      (dist.f[DIR_M0M])[kbw  ] = f1_TE  ;
+      (dist.f[DIR_P0M])[kbe  ] = f1_TW  ;
+      (dist.f[DIR_M0P])[ktw  ] = f1_BE  ;
+      (dist.f[DIR_0PP])[ktn  ] = f1_BS  ;
+      (dist.f[DIR_0MM])[kbs  ] = f1_TN  ;
+      (dist.f[DIR_0PM])[kbn  ] = f1_TS  ;
+      (dist.f[DIR_0MP])[kts  ] = f1_BN  ;
       (dist.f[DIR_000])[kzero] = f1_ZERO;
-      (dist.f[DIR_PPP ])[ktne ] = f1_BSW ;
-      (dist.f[DIR_MMP ])[ktsw ] = f1_BNE ;
-      (dist.f[DIR_PMP ])[ktse ] = f1_BNW ;
-      (dist.f[DIR_MPP ])[ktnw ] = f1_BSE ;
-      (dist.f[DIR_PPM ])[kbne ] = f1_TSW ;
-      (dist.f[DIR_MMM ])[kbsw ] = f1_TNE ;
-      (dist.f[DIR_PMM ])[kbse ] = f1_TNW ;
-      (dist.f[DIR_MPM ])[kbnw ] = f1_TSE ;
+      (dist.f[DIR_PPP])[ktne ] = f1_BSW ;
+      (dist.f[DIR_MMP])[ktsw ] = f1_BNE ;
+      (dist.f[DIR_PMP])[ktse ] = f1_BNW ;
+      (dist.f[DIR_MPP])[ktnw ] = f1_BSE ;
+      (dist.f[DIR_PPM])[kbne ] = f1_TSW ;
+      (dist.f[DIR_MMM])[kbsw ] = f1_TNE ;
+      (dist.f[DIR_PMM])[kbse ] = f1_TNW ;
+      (dist.f[DIR_MPM])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1107,16 +1137,17 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LB_BC_Press_East27( int nx, 
-                                               int ny, 
-                                               int tz, 
-                                               unsigned int* bcMatD, 
-                                               unsigned int* neighborX,
-                                               unsigned int* neighborY,
-                                               unsigned int* neighborZ,
-                                               real* DD, 
-                                               unsigned int size_Mat, 
-                                               bool isEvenTimestep) 
+__global__ void LB_BC_Press_East27(
+    int nx,
+    int ny,
+    int tz,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //thread-index
    int ty = blockIdx.x;
@@ -1124,9 +1155,9 @@ __global__ void LB_BC_Press_East27( int nx,
 
    int  k, k1, nxny;                   // Zugriff auf arrays im device
 
-   int  x = tx + STARTOFFX;  // Globaler x-Index 
-   int  y = ty + STARTOFFY;  // Globaler y-Index 
-   int  z = tz + STARTOFFZ;  // Globaler z-Index 
+   int  x = tx + STARTOFFX;  // Globaler x-Index
+   int  y = ty + STARTOFFY;  // Globaler y-Index
+   int  z = tz + STARTOFFZ;  // Globaler z-Index
 
    k = nx*(ny*z + y) + x;
    nxny = nx*ny;
@@ -1137,63 +1168,63 @@ __global__ void LB_BC_Press_East27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////
       ////////////////////////////////////////////////////////////////////////////////
@@ -1312,69 +1343,69 @@ __global__ void LB_BC_Press_East27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                    f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                         f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;
-      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;
-      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;
-      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;
+      (D.f[DIR_P00])[ke   ] = f1_W   -c2o27*drho1;
+      (D.f[DIR_M00])[kw   ] = f1_E   -c2o27*drho1;
+      (D.f[DIR_0P0])[kn   ] = f1_S   -c2o27*drho1;
+      (D.f[DIR_0M0])[ks   ] = f1_N   -c2o27*drho1;
+      (D.f[DIR_00P])[kt   ] = f1_B   -c2o27*drho1;
+      (D.f[DIR_00M])[kb   ] = f1_T   -c2o27*drho1;
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;       
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;
    }
    __syncthreads();
-}          
+}
 //////////////////////////////////////////////////////////////////////////////
 
 
@@ -1416,83 +1447,84 @@ __global__ void LB_BC_Press_East27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDevice27(real* rhoBC,
-                                           real* DD, 
-                                           int* k_Q, 
-                                           real* QQ,
-                                           unsigned int numberOfBCnodes, 
-                                           real om1, 
-                                           unsigned int* neighborX,
-                                           unsigned int* neighborY,
-                                           unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
-                                           bool isEvenTimestep)
+__global__ void QPressDevice27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+   }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -1502,29 +1534,29 @@ __global__ void QPressDevice27(real* rhoBC,
 
    if(k<numberOfBCnodes)
    {
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
          *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-         *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+         *q_dirBSE, *q_dirBNW;
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1567,46 +1599,46 @@ __global__ void QPressDevice27(real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real q, vx1, vx2, vx3, drho;
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                  (f_E - f_W); 
+                  (f_E - f_W);
 
 
       vx2    =   (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                  (f_N - f_S); 
+                  (f_N - f_S);
 
       vx3    =   ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                  (f_T - f_B); 
+                  (f_T - f_B);
 
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
@@ -1616,245 +1648,245 @@ __global__ void QPressDevice27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-         //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
+         (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq); 
-         //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
+         (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+         //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
+         (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-         //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
+         (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-         //(D.f[DIR_00P])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
+         (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         //(D.f[DIR_00P])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
-         //(D.f[DIR_00M])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
+         (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+         //(D.f[DIR_00M])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
+         (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
+         (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
+         (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
+         (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
+         (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
+         (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
+         (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
+         (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
+         (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
+         (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
+         (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
+         (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
       }
    }
 }
@@ -1899,86 +1931,87 @@ __global__ void QPressDevice27(real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceAntiBB27(   real* rhoBC,
-												   real* vx,
-												   real* vy,
-												   real* vz,
-												   real* DD, 
-												   int* k_Q, 
-												   real* QQ,
-												   int numberOfBCnodes, 
-												   real om1, 
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   unsigned int size_Mat, 
-												   bool isEvenTimestep)
+__global__ void QPressDeviceAntiBB27(
+    real* rhoBC,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+   }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -1988,37 +2021,37 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
    if(k<numberOfBCnodes)
    {
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
          *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-         *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   *numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   *numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   *numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   *numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   *numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   *numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  *numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  *numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  *numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  *numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  *numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  *numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  *numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  *numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  *numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  *numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  *numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  *numberOfBCnodes];
-      q_dirTNE = &QQ[DIR_PPP *numberOfBCnodes];
-      q_dirTSW = &QQ[DIR_MMP *numberOfBCnodes];
-      q_dirTSE = &QQ[DIR_PMP *numberOfBCnodes];
-      q_dirTNW = &QQ[DIR_MPP *numberOfBCnodes];
-      q_dirBNE = &QQ[DIR_PPM *numberOfBCnodes];
-      q_dirBSW = &QQ[DIR_MMM *numberOfBCnodes];
-      q_dirBSE = &QQ[DIR_PMM *numberOfBCnodes];
-      q_dirBNW = &QQ[DIR_MPM *numberOfBCnodes];
+         *q_dirBSE, *q_dirBNW;
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2053,123 +2086,123 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       f_ZERO = (D.f[DIR_000])[kzero];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, drho;
       //vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //            ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-      //            (f_E - f_W); 
+      //            (f_E - f_W);
 
 
       //vx2    =   (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //            ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-      //            (f_N - f_S); 
+      //            (f_N - f_S);
 
       //vx3    =   ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
       //            (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-      //            (f_T - f_B); 
+      //            (f_T - f_B);
 
       //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
       real drho    = f_ZERO+f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+
-						f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
+                  f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
       drho = drho - rhoBC[k];
-	  drho *= 0.01f;
+     drho *= 0.01f;
       ////////////////////////////////////////////////////////////////////////////////
-	  real q;
+     real q;
       //deltaRho = (rhoBC[k] + one) / (deltaRho + one);
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M00])[kw]=f_W-c2o27*drho; 
+         (D.f[DIR_M00])[kw]=f_W-c2o27*drho;
       }
 
       q = q_dirW[k];
@@ -2181,19 +2214,19 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0M0])[ks]=f_S-c2o27*drho; 
+         (D.f[DIR_0M0])[ks]=f_S-c2o27*drho;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0P0])[kn]=f_N-c2o27*drho; 
+         (D.f[DIR_0P0])[kn]=f_N-c2o27*drho;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00M])[kb]=f_B-c2o27*drho; 
+         (D.f[DIR_00M])[kb]=f_B-c2o27*drho;
       }
 
       q = q_dirB[k];
@@ -2229,13 +2262,13 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho; 
+         (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho; 
+         (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho;
       }
 
       q = q_dirBE[k];
@@ -2364,21 +2397,22 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceFixBackflow27( real* rhoBC,
-                                                      real* DD, 
-                                                      int* k_Q, 
-                                                      int numberOfBCnodes, 
-                                                      real om1, 
-                                                      unsigned int* neighborX,
-                                                      unsigned int* neighborY,
-                                                      unsigned int* neighborZ,
-                                                      unsigned int size_Mat, 
-                                                      bool isEvenTimestep)
+__global__ void QPressDeviceFixBackflow27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -2426,63 +2460,63 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
          (D.f[DIR_M00])[kw]       = c2o27  * deltaRho;
@@ -2555,21 +2589,22 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
-                                                     real* DD, 
-                                                     int* k_Q, 
-                                                     int numberOfBCnodes, 
-                                                     real om1, 
-                                                     unsigned int* neighborX,
-                                                     unsigned int* neighborY,
-                                                     unsigned int* neighborZ,
-                                                     unsigned int size_Mat, 
-                                                     bool isEvenTimestep)
+__global__ void QPressDeviceDirDepBot27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -2617,86 +2652,86 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_NE,f_SW,f_SE,f_NW,f_TE,f_TW,f_TN,f_TS,f_ZERO,f_TNE,f_TSW,f_TSE,f_TNW;//,
             //f_B,f_BW,f_BE,f_BS,f_BN,f_BSW,f_BNE,f_BNW,f_BSE;
 
-      f_E    = (D.f[DIR_P00   ])[ke   ];
-      f_W    = (D.f[DIR_M00   ])[kw   ];
-      f_N    = (D.f[DIR_0P0   ])[kn   ];
-      f_S    = (D.f[DIR_0M0   ])[ks   ];
-      f_T    = (D.f[DIR_00P   ])[kt   ];
-      f_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f_E    = (D.f[DIR_P00])[ke   ];
+      f_W    = (D.f[DIR_M00])[kw   ];
+      f_N    = (D.f[DIR_0P0])[kn   ];
+      f_S    = (D.f[DIR_0M0])[ks   ];
+      f_T    = (D.f[DIR_00P])[kt   ];
+      f_NE   = (D.f[DIR_PP0])[kne  ];
+      f_SW   = (D.f[DIR_MM0])[ksw  ];
+      f_SE   = (D.f[DIR_PM0])[kse  ];
+      f_NW   = (D.f[DIR_MP0])[knw  ];
+      f_TE   = (D.f[DIR_P0P])[kte  ];
+      f_TW   = (D.f[DIR_M0P])[ktw  ];
+      f_TN   = (D.f[DIR_0PP])[ktn  ];
+      f_TS   = (D.f[DIR_0MP])[kts  ];
       f_ZERO = (D.f[DIR_000])[kzero];
-      f_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f_TNW  = (D.f[DIR_MPP ])[ktnw ];
+      f_TNE  = (D.f[DIR_PPP])[ktne ];
+      f_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f_TSE  = (D.f[DIR_PMP])[ktse ];
+      f_TNW  = (D.f[DIR_MPP])[ktnw ];
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
       //f_B   = (four*rho- four*f_SW-     eight*f_TSW-four*f_W-   eight*f_TW- four*f_NW-     eight*f_TNW-four*f_S-   eight*f_TS-four*f_ZERO+     f_T-four*f_N-   eight*f_TN- four*f_SE-     eight*f_TSE-four*f_E-   eight*f_TE- four*f_NE-     eight*f_TNE)/nine;
@@ -2793,496 +2828,474 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
 
 
 
-
-
+__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs)
+{
+   return f1[dir] * cs + (c1o1 - cs) * f[dir];
+}
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressNoRhoDevice27(  real* rhoBC,
-												 real* DD, 
-												 int* k_Q, 
-												 int* k_N, 
-												 int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QPressNoRhoDevice27(
+    real* rhoBC,
+    real* distributions,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    int direction)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   //////////////////////////////////////////////////////////////////////////
 
-   const unsigned k = nx*(ny*z + y) + x;
+   if(nodeIndex >= numberOfBCnodes) return;
+
+   ////////////////////////////////////////////////////////////////////////////////
+   //index
+   unsigned int KQK  = k_Q[nodeIndex];
+   // unsigned int kzero= KQK;
+   unsigned int ke   = KQK;
+   unsigned int kw   = neighborX[KQK];
+   unsigned int kn   = KQK;
+   unsigned int ks   = neighborY[KQK];
+   unsigned int kt   = KQK;
+   unsigned int kb   = neighborZ[KQK];
+   unsigned int ksw  = neighborY[kw];
+   unsigned int kne  = KQK;
+   unsigned int kse  = ks;
+   unsigned int knw  = kw;
+   unsigned int kbw  = neighborZ[kw];
+   unsigned int kte  = KQK;
+   unsigned int kbe  = kb;
+   unsigned int ktw  = kw;
+   unsigned int kbs  = neighborZ[ks];
+   unsigned int ktn  = KQK;
+   unsigned int kbn  = kb;
+   unsigned int kts  = ks;
+   unsigned int ktse = ks;
+   unsigned int kbnw = kbw;
+   unsigned int ktnw = kw;
+   unsigned int kbse = kbs;
+   unsigned int ktsw = ksw;
+   unsigned int kbne = kb;
+   unsigned int ktne = KQK;
+   unsigned int kbsw = neighborZ[ksw];
+   ////////////////////////////////////////////////////////////////////////////////
+   //index1
+   unsigned int K1QK  = k_N[nodeIndex];
+   //unsigned int k1zero= K1QK;
+   unsigned int k1e   = K1QK;
+   unsigned int k1w   = neighborX[K1QK];
+   unsigned int k1n   = K1QK;
+   unsigned int k1s   = neighborY[K1QK];
+   unsigned int k1t   = K1QK;
+   unsigned int k1b   = neighborZ[K1QK];
+   unsigned int k1sw  = neighborY[k1w];
+   unsigned int k1ne  = K1QK;
+   unsigned int k1se  = k1s;
+   unsigned int k1nw  = k1w;
+   unsigned int k1bw  = neighborZ[k1w];
+   unsigned int k1te  = K1QK;
+   unsigned int k1be  = k1b;
+   unsigned int k1tw  = k1w;
+   unsigned int k1bs  = neighborZ[k1s];
+   unsigned int k1tn  = K1QK;
+   unsigned int k1bn  = k1b;
+   unsigned int k1ts  = k1s;
+   unsigned int k1tse = k1s;
+   unsigned int k1bnw = k1bw;
+   unsigned int k1tnw = k1w;
+   unsigned int k1bse = k1bs;
+   unsigned int k1tsw = k1sw;
+   unsigned int k1bne = k1b;
+   unsigned int k1tne = K1QK;
+   unsigned int k1bsw = neighborZ[k1sw];
+   ////////////////////////////////////////////////////////////////////////////////
+   Distributions27 dist;
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+   real f[27], f1[27];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f1[DIR_P00] = (dist.f[DIR_P00])[k1e   ];
+   f1[DIR_M00] = (dist.f[DIR_M00])[k1w   ];
+   f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n   ];
+   f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s   ];
+   f1[DIR_00P] = (dist.f[DIR_00P])[k1t   ];
+   f1[DIR_00M] = (dist.f[DIR_00M])[k1b   ];
+   f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne  ];
+   f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw  ];
+   f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se  ];
+   f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw  ];
+   f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te  ];
+   f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw  ];
+   f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be  ];
+   f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw  ];
+   f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn  ];
+   f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs  ];
+   f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn  ];
+   f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts  ];
+   // f1[DIR_000] = (dist.f[DIR_000])[k1zero];
+   f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ];
+   f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ];
+   f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ];
+   f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ];
+   f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ];
+   f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ];
+   f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ];
+   f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f[DIR_P00] = (dist.f[DIR_P00])[ke   ];
+   f[DIR_M00] = (dist.f[DIR_M00])[kw   ];
+   f[DIR_0P0] = (dist.f[DIR_0P0])[kn   ];
+   f[DIR_0M0] = (dist.f[DIR_0M0])[ks   ];
+   f[DIR_00P] = (dist.f[DIR_00P])[kt   ];
+   f[DIR_00M] = (dist.f[DIR_00M])[kb   ];
+   f[DIR_PP0] = (dist.f[DIR_PP0])[kne  ];
+   f[DIR_MM0] = (dist.f[DIR_MM0])[ksw  ];
+   f[DIR_PM0] = (dist.f[DIR_PM0])[kse  ];
+   f[DIR_MP0] = (dist.f[DIR_MP0])[knw  ];
+   f[DIR_P0P] = (dist.f[DIR_P0P])[kte  ];
+   f[DIR_M0M] = (dist.f[DIR_M0M])[kbw  ];
+   f[DIR_P0M] = (dist.f[DIR_P0M])[kbe  ];
+   f[DIR_M0P] = (dist.f[DIR_M0P])[ktw  ];
+   f[DIR_0PP] = (dist.f[DIR_0PP])[ktn  ];
+   f[DIR_0MM] = (dist.f[DIR_0MM])[kbs  ];
+   f[DIR_0PM] = (dist.f[DIR_0PM])[kbn  ];
+   f[DIR_0MP] = (dist.f[DIR_0MP])[kts  ];
+   // f[DIR_000] = (dist.f[DIR_000])[kzero];
+   f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ];
+   f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ];
+   f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ];
+   f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ];
+   f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ];
+   f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ];
+   f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ];
+   f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ];
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
+
+   real cs = c1o1 / sqrtf(c3o1);
+
+   //////////////////////////////////////////////////////////////////////////
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+   switch(direction)
    {
-      ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
-      //unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = KQK;
-      unsigned int kbsw = neighborZ[ksw];
-      ////////////////////////////////////////////////////////////////////////////////
-      //index1
-      unsigned int K1QK  = k_N[k];
-      //unsigned int k1zero= K1QK;
-      unsigned int k1e   = K1QK;
-      unsigned int k1w   = neighborX[K1QK];
-      unsigned int k1n   = K1QK;
-      unsigned int k1s   = neighborY[K1QK];
-      unsigned int k1t   = K1QK;
-      unsigned int k1b   = neighborZ[K1QK];
-      unsigned int k1sw  = neighborY[k1w];
-      unsigned int k1ne  = K1QK;
-      unsigned int k1se  = k1s;
-      unsigned int k1nw  = k1w;
-      unsigned int k1bw  = neighborZ[k1w];
-      unsigned int k1te  = K1QK;
-      unsigned int k1be  = k1b;
-      unsigned int k1tw  = k1w;
-      unsigned int k1bs  = neighborZ[k1s];
-      unsigned int k1tn  = K1QK;
-      unsigned int k1bn  = k1b;
-      unsigned int k1ts  = k1s;
-      unsigned int k1tse = k1s;
-      unsigned int k1bnw = k1bw;
-      unsigned int k1tnw = k1w;
-      unsigned int k1bse = k1bs;
-      unsigned int k1tsw = k1sw;
-      unsigned int k1bne = k1b;
-      unsigned int k1tne = K1QK;
-      unsigned int k1bsw = neighborZ[k1sw];
-      ////////////////////////////////////////////////////////////////////////////////
-      Distributions27 D;
-      if (isEvenTimestep==true)
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (D.f[DIR_P00   ])[k1e   ];
-      real f1_W    = (D.f[DIR_M00   ])[k1w   ];
-      real f1_N    = (D.f[DIR_0P0   ])[k1n   ];
-      real f1_S    = (D.f[DIR_0M0   ])[k1s   ];
-      real f1_T    = (D.f[DIR_00P   ])[k1t   ];
-      real f1_B    = (D.f[DIR_00M   ])[k1b   ];
-      real f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
-      real f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
-      real f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
-      real f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
-      real f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
-      real f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
-      real f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
-      real f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
-      real f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
-      real f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
-      real f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
-      real f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
-      //real f1_ZERO = (D.f[DIR_000])[k1zero];
-      real f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
-      real f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
-      real f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
-      real f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
-      real f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
-      real f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
-      real f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
-      real f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (D.f[DIR_P00   ])[ke   ];
-      real f_W    = (D.f[DIR_M00   ])[kw   ];
-      real f_N    = (D.f[DIR_0P0   ])[kn   ];
-      real f_S    = (D.f[DIR_0M0   ])[ks   ];
-      real f_T    = (D.f[DIR_00P   ])[kt   ];
-      real f_B    = (D.f[DIR_00M   ])[kb   ];
-      real f_NE   = (D.f[DIR_PP0  ])[kne  ];
-      real f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_SE   = (D.f[DIR_PM0  ])[kse  ];
-      real f_NW   = (D.f[DIR_MP0  ])[knw  ];
-      real f_TE   = (D.f[DIR_P0P  ])[kte  ];
-      real f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_TS   = (D.f[DIR_0MP  ])[kts  ];
-      //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_TNE  = (D.f[DIR_PPP ])[ktne ];
-      real f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      real f_TSE  = (D.f[DIR_PMP ])[ktse ];
-      real f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      real f_BNE  = (D.f[DIR_PPM ])[kbne ];
-      real f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      real f_BSE  = (D.f[DIR_PMM ])[kbse ];
-      real f_BNW  = (D.f[DIR_MPM ])[kbnw ];
-      //////////////////////////////////////////////////////////////////////////
+      case MZZ:
+         (dist.f[DIR_P00])[ke   ] = computeOutflowDistribution(f, f1, DIR_P00, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         break;
+
+      case PZZ:
+         (dist.f[DIR_M00])[kw   ] = computeOutflowDistribution(f, f1, DIR_M00, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;
+
+      case ZMZ:
+         (dist.f[DIR_0P0])[kn   ] = computeOutflowDistribution(f, f1, DIR_0P0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;
+
+      case ZPZ:
+         (dist.f[DIR_0M0])[ks   ] = computeOutflowDistribution(f, f1, DIR_0M0, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         break;
+
+      case ZZM:
+         (dist.f[DIR_00P])[kt   ] = computeOutflowDistribution(f, f1, DIR_00P, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         break;
+
+      case ZZP:
+         (dist.f[DIR_00M])[kb   ] = computeOutflowDistribution(f, f1, DIR_00M, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         break;
+      default:
+         break;
+   }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-      //real vx1, vx2, vx3, drho;
-      //real vx1, vx2, vx3, drho, drho1;
-      //////////////////////////////////////////////////////////////////////////
-	  //Dichte
-    //   drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-    //             f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-    //             f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
-    //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-    //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-    //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
-      
-      //////////////////////////////////////////////////////////////////////////
-	  //Ux
 
-	  //vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-   //               ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-   //               (f_E - f_W)) /(one + drho); 
 
 
-   //   vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-   //               ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-   //               (f_N - f_S)) /(one + drho); 
 
-   //   vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-   //               (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-   //               (f_T - f_B)) /(one + drho); 
 
 
-      //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-   //   //////////////////////////////////////////////////////////////////////////
-	  ////real omega = om1;
-   //   real cusq  = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
-   //   //////////////////////////////////////////////////////////////////////////
-	  ////Tï¿½st MK
-	  ////if(vx1 < zero) vx1 = zero;
-   //   //////////////////////////////////////////////////////////////////////////
-   //   real fZERO = c8over27*  (drho1-(one + drho1)*(cusq))                                                           ;
-   //   real fE    = c2over27*  (drho1+(one + drho1)*(three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq));
-   //   real fW    = c2over27*  (drho1+(one + drho1)*(three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq));
-   //   real fN    = c2over27*  (drho1+(one + drho1)*(three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cusq));
-   //   real fS    = c2over27*  (drho1+(one + drho1)*(three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cusq));
-   //   real fT    = c2over27*  (drho1+(one + drho1)*(three*(         vx3)+c9over2*(         vx3)*(         vx3)-cusq));
-   //   real fB    = c2over27*  (drho1+(one + drho1)*(three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cusq));
-   //   real fNE   = c1over54*  (drho1+(one + drho1)*(three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cusq));
-   //   real fSW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq));
-   //   real fSE   = c1over54*  (drho1+(one + drho1)*(three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq));
-   //   real fNW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq));
-   //   real fTE   = c1over54*  (drho1+(one + drho1)*(three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq));
-   //   real fBW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq));
-   //   real fBE   = c1over54*  (drho1+(one + drho1)*(three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq));
-   //   real fTW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
-   //   real fTN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq));
-   //   real fBS   = c1over54*  (drho1+(one + drho1)*(three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq));
-   //   real fBN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq));
-   //   real fTS   = c1over54*  (drho1+(one + drho1)*(three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cusq));
-   //   real fTNE  = c1over216* (drho1+(one + drho1)*(three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq));
-   //   real fBSW  = c1over216* (drho1+(one + drho1)*(three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq));
-   //   real fBNE  = c1over216* (drho1+(one + drho1)*(three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq));
-   //   real fTSW  = c1over216* (drho1+(one + drho1)*(three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq));
-   //   real fTSE  = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq));
-   //   real fBNW  = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq));
-   //   real fBSE  = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
-   //   real fTNW  = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
-
-	  real cs = c1o1 / sqrtf(c3o1);
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //no velocity
-	  //////////////////////////////////////////
-      f_E    = f1_E   * cs + (c1o1 - cs) * f_E   ;
-      f_W    = f1_W   * cs + (c1o1 - cs) * f_W   ;
-      f_N    = f1_N   * cs + (c1o1 - cs) * f_N   ;
-      f_S    = f1_S   * cs + (c1o1 - cs) * f_S   ;
-      f_T    = f1_T   * cs + (c1o1 - cs) * f_T   ;
-      f_B    = f1_B   * cs + (c1o1 - cs) * f_B   ;
-      f_NE   = f1_NE  * cs + (c1o1 - cs) * f_NE  ;
-      f_SW   = f1_SW  * cs + (c1o1 - cs) * f_SW  ;
-      f_SE   = f1_SE  * cs + (c1o1 - cs) * f_SE  ;
-      f_NW   = f1_NW  * cs + (c1o1 - cs) * f_NW  ;
-      f_TE   = f1_TE  * cs + (c1o1 - cs) * f_TE  ;
-      f_BW   = f1_BW  * cs + (c1o1 - cs) * f_BW  ;
-      f_BE   = f1_BE  * cs + (c1o1 - cs) * f_BE  ;
-      f_TW   = f1_TW  * cs + (c1o1 - cs) * f_TW  ;
-      f_TN   = f1_TN  * cs + (c1o1 - cs) * f_TN  ;
-      f_BS   = f1_BS  * cs + (c1o1 - cs) * f_BS  ;
-      f_BN   = f1_BN  * cs + (c1o1 - cs) * f_BN  ;
-      f_TS   = f1_TS  * cs + (c1o1 - cs) * f_TS  ;
-      f_TNE  = f1_TNE * cs + (c1o1 - cs) * f_TNE ;
-      f_TSW  = f1_TSW * cs + (c1o1 - cs) * f_TSW ;
-      f_TSE  = f1_TSE * cs + (c1o1 - cs) * f_TSE ;
-      f_TNW  = f1_TNW * cs + (c1o1 - cs) * f_TNW ;
-      f_BNE  = f1_BNE * cs + (c1o1 - cs) * f_BNE ;
-      f_BSW  = f1_BSW * cs + (c1o1 - cs) * f_BSW ;
-      f_BSE  = f1_BSE * cs + (c1o1 - cs) * f_BSE ;
-      f_BNW  = f1_BNW * cs + (c1o1 - cs) * f_BNW ;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //with velocity
-	  //if(true){//vx1 >= zero){
-		 // real csMvx = one / sqrtf(three) - vx1;
-		 // //real csMvy = one / sqrtf(three) - vx2;
-		 // ///////////////////////////////////////////
-		 // // X
-		 // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
-		 // //////////////////////////////////////////////////////////////////////////
-	  //}
-	  //else
-	  //{
-		 // ///////////////////////////////////////////
-		 // // X
-		 // vx1   = vx1 * 0.9;
-		 // f_W   = f_E   - six * c2over27  * ( vx1        );
-		 // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
-		 // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
-		 // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
-		 // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
-		 // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
-		 // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //vx2   = vx2 * 0.9;
-		 // //f_S   = f_N   - six * c2over27  * (     vx2    );
-		 // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
-		 // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
-		 // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
-		 // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
-		 // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
-		 // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-	  //}
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
-      //////////////////////////////////////////////////////////////////////////
-      //__syncthreads();
-	  // -X
-	  //(D.f[DIR_P00   ])[ke   ] = f_E   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;     
-	  // X
-	  (D.f[DIR_M00   ])[kw   ] = f_W   ;
-	  (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
-	  (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
-	  (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
-	  (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
-	  (D.f[DIR_MPM ])[kbnw ] = f_BNW ;     
-	  // Y
-	  //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
-	  // Z
-	  //(D.f[DIR_00M   ])[kb   ] = f_B   ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
-	  //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
-      //////////////////////////////////////////////////////////////////////////
-   }
-}
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight)
+{
+   return f1[dir  ] * cs + (c1o1 - cs) * f[dir  ] - weight *rhoCorrection;
+}
+
+__global__ void QPressZeroRhoOutflowDevice27(
+    real* rhoBC,
+    real* distributions,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    int direction,
+    real densityCorrectionFactor)
+{
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
+   //////////////////////////////////////////////////////////////////////////
 
+   if( nodeIndex >= numberOfBCnodes ) return;
 
+   ////////////////////////////////////////////////////////////////////////////////
+   //index
 
+   uint k_000 = k_Q[nodeIndex];
+   uint k_M00 = neighborX[k_000];
+   uint k_0M0 = neighborY[k_000];
+   uint k_00M = neighborZ[k_000];
+   uint k_MM0 = neighborY[k_M00];
+   uint k_M0M = neighborZ[k_M00];
+   uint k_0MM = neighborZ[k_0M0];
+   uint k_MMM = neighborZ[k_MM0];
 
+   ////////////////////////////////////////////////////////////////////////////////
+   //index of neighbor
+   uint kN_000 = k_N[nodeIndex];
+   uint kN_M00 = neighborX[k_000];
+   uint kN_0M0 = neighborY[k_000];
+   uint kN_00M = neighborZ[k_000];
+   uint kN_MM0 = neighborY[k_M00];
+   uint kN_M0M = neighborZ[k_M00];
+   uint kN_0MM = neighborZ[k_0M0];
+   uint kN_MMM = neighborZ[k_MM0];
+   ////////////////////////////////////////////////////////////////////////////////
+   Distributions27 dist;
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+   real f[27], fN[27];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f[DIR_000] = (dist.f[DIR_000])[k_000];
+   f[DIR_P00] = (dist.f[DIR_P00])[k_000];
+   f[DIR_M00] = (dist.f[DIR_M00])[k_M00];
+   f[DIR_0P0] = (dist.f[DIR_0P0])[k_000];
+   f[DIR_0M0] = (dist.f[DIR_0M0])[k_0M0];
+   f[DIR_00P] = (dist.f[DIR_00P])[k_000];
+   f[DIR_00M] = (dist.f[DIR_00M])[k_00M];
+   f[DIR_PP0] = (dist.f[DIR_PP0])[k_000];
+   f[DIR_MM0] = (dist.f[DIR_MM0])[k_MM0];
+   f[DIR_PM0] = (dist.f[DIR_PM0])[k_0M0];
+   f[DIR_MP0] = (dist.f[DIR_MP0])[k_M00];
+   f[DIR_P0P] = (dist.f[DIR_P0P])[k_000];
+   f[DIR_M0M] = (dist.f[DIR_M0M])[k_M0M];
+   f[DIR_P0M] = (dist.f[DIR_P0M])[k_00M];
+   f[DIR_M0P] = (dist.f[DIR_M0P])[k_M00];
+   f[DIR_0PP] = (dist.f[DIR_0PP])[k_000];
+   f[DIR_0MM] = (dist.f[DIR_0MM])[k_0MM];
+   f[DIR_0PM] = (dist.f[DIR_0PM])[k_00M];
+   f[DIR_0MP] = (dist.f[DIR_0MP])[k_0M0];
+   f[DIR_PPP] = (dist.f[DIR_PPP])[k_000];
+   f[DIR_MPP] = (dist.f[DIR_MPP])[k_M00];
+   f[DIR_PMP] = (dist.f[DIR_PMP])[k_0M0];
+   f[DIR_MMP] = (dist.f[DIR_MMP])[k_MM0];
+   f[DIR_PPM] = (dist.f[DIR_PPM])[k_00M];
+   f[DIR_MPM] = (dist.f[DIR_MPM])[k_M0M];
+   f[DIR_PMM] = (dist.f[DIR_PMM])[k_0MM];
+   f[DIR_MMM] = (dist.f[DIR_MMM])[k_MMM];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   fN[DIR_000] = (dist.f[DIR_000])[kN_000];
+   fN[DIR_P00] = (dist.f[DIR_P00])[kN_000];
+   fN[DIR_M00] = (dist.f[DIR_M00])[kN_M00];
+   fN[DIR_0P0] = (dist.f[DIR_0P0])[kN_000];
+   fN[DIR_0M0] = (dist.f[DIR_0M0])[kN_0M0];
+   fN[DIR_00P] = (dist.f[DIR_00P])[kN_000];
+   fN[DIR_00M] = (dist.f[DIR_00M])[kN_00M];
+   fN[DIR_PP0] = (dist.f[DIR_PP0])[kN_000];
+   fN[DIR_MM0] = (dist.f[DIR_MM0])[kN_MM0];
+   fN[DIR_PM0] = (dist.f[DIR_PM0])[kN_0M0];
+   fN[DIR_MP0] = (dist.f[DIR_MP0])[kN_M00];
+   fN[DIR_P0P] = (dist.f[DIR_P0P])[kN_000];
+   fN[DIR_M0M] = (dist.f[DIR_M0M])[kN_M0M];
+   fN[DIR_P0M] = (dist.f[DIR_P0M])[kN_00M];
+   fN[DIR_M0P] = (dist.f[DIR_M0P])[kN_M00];
+   fN[DIR_0PP] = (dist.f[DIR_0PP])[kN_000];
+   fN[DIR_0MM] = (dist.f[DIR_0MM])[kN_0MM];
+   fN[DIR_0PM] = (dist.f[DIR_0PM])[kN_00M];
+   fN[DIR_0MP] = (dist.f[DIR_0MP])[kN_0M0];
+   fN[DIR_PPP] = (dist.f[DIR_PPP])[kN_000];
+   fN[DIR_MPP] = (dist.f[DIR_MPP])[kN_M00];
+   fN[DIR_PMP] = (dist.f[DIR_PMP])[kN_0M0];
+   fN[DIR_MMP] = (dist.f[DIR_MMP])[kN_MM0];
+   fN[DIR_PPM] = (dist.f[DIR_PPM])[kN_00M];
+   fN[DIR_MPM] = (dist.f[DIR_MPM])[kN_M0M];
+   fN[DIR_PMM] = (dist.f[DIR_PMM])[kN_0MM];
+   fN[DIR_MMM] = (dist.f[DIR_MMM])[kN_MMM];
+   //////////////////////////////////////////////////////////////////////////
+   real drho = vf::lbm::getDensity(f);
 
+   real rhoCorrection = densityCorrectionFactor*drho;
 
+   real cs = c1o1 / sqrtf(c3o1);
 
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
+   switch(direction)
+   {
+      case MZZ:
+         (dist.f[DIR_P00])[k_000] = computeOutflowDistribution(f, fN, DIR_P00  , rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PM0])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PP0])[k_000] = computeOutflowDistribution(f, fN, DIR_PP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_P0M])[k_00M] = computeOutflowDistribution(f, fN, DIR_P0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_P0P])[k_000] = computeOutflowDistribution(f, fN, DIR_P0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PMP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216);
+         break;
+
+      case PZZ:
+         (dist.f[DIR_M00])[k_M00] = computeOutflowDistribution(f, fN, DIR_M00, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_MM0])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MP0])[k_M00] = computeOutflowDistribution(f, fN, DIR_MP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0M])[k_M0M] = computeOutflowDistribution(f, fN, DIR_M0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0P])[k_M00] = computeOutflowDistribution(f, fN, DIR_M0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZMZ:
+         (dist.f[DIR_0P0])[k_000] = computeOutflowDistribution(f, fN, DIR_0P0, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PP0])[k_000] = computeOutflowDistribution(f, fN, DIR_PP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MP0])[k_M00] = computeOutflowDistribution(f, fN, DIR_MP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PP])[k_000] = computeOutflowDistribution(f, fN, DIR_0PP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PM])[k_00M] = computeOutflowDistribution(f, fN, DIR_0PM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZPZ:
+         (dist.f[DIR_0M0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_0M0, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PM0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_PM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MM0])[k_MM0] =computeOutflowDistribution(f, fN, DIR_MM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MP])[k_0M0] =computeOutflowDistribution(f, fN, DIR_0MP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MM])[k_0MM] =computeOutflowDistribution(f, fN, DIR_0MM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PMP])[k_0M0] =computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMP])[k_MM0] =computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[k_0MM] =computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[k_MMM] =computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZZM:
+         (dist.f[DIR_00P])[k_000] = computeOutflowDistribution(f, fN, DIR_00P, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_P0P])[k_000] = computeOutflowDistribution(f, fN, DIR_P0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0P])[k_M00] = computeOutflowDistribution(f, fN, DIR_M0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PP])[k_000] = computeOutflowDistribution(f, fN, DIR_0PP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_0MP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZZP:
+         (dist.f[DIR_00M])[k_00M] = computeOutflowDistribution(f, fN, DIR_00M, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_P0M])[k_00M] = computeOutflowDistribution(f, fN, DIR_P0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0M])[k_M0M] = computeOutflowDistribution(f, fN, DIR_M0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PM])[k_00M] = computeOutflowDistribution(f, fN, DIR_0PM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_0MM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216);
+         break;
+      default:
+         break;
+   }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
 
@@ -3314,22 +3327,23 @@ __global__ void QPressNoRhoDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceOld27(real* rhoBC,
-                                             real* DD, 
-                                             int* k_Q, 
-                                             int* k_N, 
-                                             int numberOfBCnodes, 
-                                             real om1, 
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int size_Mat, 
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceOld27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -3403,133 +3417,133 @@ __global__ void QPressDeviceOld27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                           f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     drho1 = drho1 - rhoBC[k];
       //////////////////////////////////////////////////////////////////////////
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
-      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P00])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
+      (D.f[DIR_M00])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0P0])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0M0])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00P])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00M])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3573,23 +3587,24 @@ __global__ void QPressDeviceOld27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceEQZ27(real* rhoBC,
-                                             real* DD, 
-                                             int* k_Q, 
-                                             int* k_N,
-											 real* kTestRE,
-                                             int numberOfBCnodes, 
-                                             real om1, 
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int size_Mat, 
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceEQZ27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    real* kTestRE,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -3663,168 +3678,168 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////
     //   Distributions27 kDistTest;
-    //      kDistTest.f[DIR_P00   ] = &kTestRE[DIR_P00   *numberOfBCnodes];
-    //      kDistTest.f[DIR_M00   ] = &kTestRE[DIR_M00   *numberOfBCnodes];
-    //      kDistTest.f[DIR_0P0   ] = &kTestRE[DIR_0P0   *numberOfBCnodes];
-    //      kDistTest.f[DIR_0M0   ] = &kTestRE[DIR_0M0   *numberOfBCnodes];
-    //      kDistTest.f[DIR_00P   ] = &kTestRE[DIR_00P   *numberOfBCnodes];
-    //      kDistTest.f[DIR_00M   ] = &kTestRE[DIR_00M   *numberOfBCnodes];
-    //      kDistTest.f[DIR_PP0  ] = &kTestRE[DIR_PP0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_MM0  ] = &kTestRE[DIR_MM0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_PM0  ] = &kTestRE[DIR_PM0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_MP0  ] = &kTestRE[DIR_MP0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_P0P  ] = &kTestRE[DIR_P0P  *numberOfBCnodes];
-    //      kDistTest.f[DIR_M0M  ] = &kTestRE[DIR_M0M  *numberOfBCnodes];
-    //      kDistTest.f[DIR_P0M  ] = &kTestRE[DIR_P0M  *numberOfBCnodes];
-    //      kDistTest.f[DIR_M0P  ] = &kTestRE[DIR_M0P  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0PP  ] = &kTestRE[DIR_0PP  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0MM  ] = &kTestRE[DIR_0MM  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0PM  ] = &kTestRE[DIR_0PM  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0MP  ] = &kTestRE[DIR_0MP  *numberOfBCnodes];
-    //      kDistTest.f[DIR_000] = &kTestRE[DIR_000*numberOfBCnodes];
-    //      kDistTest.f[DIR_PPP ] = &kTestRE[DIR_PPP *numberOfBCnodes];
-    //      kDistTest.f[DIR_MMP ] = &kTestRE[DIR_MMP *numberOfBCnodes];
-    //      kDistTest.f[DIR_PMP ] = &kTestRE[DIR_PMP *numberOfBCnodes];
-    //      kDistTest.f[DIR_MPP ] = &kTestRE[DIR_MPP *numberOfBCnodes];
-    //      kDistTest.f[DIR_PPM ] = &kTestRE[DIR_PPM *numberOfBCnodes];
-    //      kDistTest.f[DIR_MMM ] = &kTestRE[DIR_MMM *numberOfBCnodes];
-    //      kDistTest.f[DIR_PMM ] = &kTestRE[DIR_PMM *numberOfBCnodes];
-    //      kDistTest.f[DIR_MPM ] = &kTestRE[DIR_MPM *numberOfBCnodes];
+    //      kDistTest.f[DIR_P00] = &kTestRE[DIR_P00 * numberOfBCnodes];
+    //      kDistTest.f[DIR_M00] = &kTestRE[DIR_M00 * numberOfBCnodes];
+    //      kDistTest.f[DIR_0P0] = &kTestRE[DIR_0P0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_0M0] = &kTestRE[DIR_0M0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_00P] = &kTestRE[DIR_00P * numberOfBCnodes];
+    //      kDistTest.f[DIR_00M] = &kTestRE[DIR_00M * numberOfBCnodes];
+    //      kDistTest.f[DIR_PP0] = &kTestRE[DIR_PP0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_MM0] = &kTestRE[DIR_MM0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_PM0] = &kTestRE[DIR_PM0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_MP0] = &kTestRE[DIR_MP0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_P0P] = &kTestRE[DIR_P0P * numberOfBCnodes];
+    //      kDistTest.f[DIR_M0M] = &kTestRE[DIR_M0M * numberOfBCnodes];
+    //      kDistTest.f[DIR_P0M] = &kTestRE[DIR_P0M * numberOfBCnodes];
+    //      kDistTest.f[DIR_M0P] = &kTestRE[DIR_M0P * numberOfBCnodes];
+    //      kDistTest.f[DIR_0PP] = &kTestRE[DIR_0PP * numberOfBCnodes];
+    //      kDistTest.f[DIR_0MM] = &kTestRE[DIR_0MM * numberOfBCnodes];
+    //      kDistTest.f[DIR_0PM] = &kTestRE[DIR_0PM * numberOfBCnodes];
+    //      kDistTest.f[DIR_0MP] = &kTestRE[DIR_0MP * numberOfBCnodes];
+    //      kDistTest.f[DIR_000] = &kTestRE[DIR_000 * numberOfBCnodes];
+    //      kDistTest.f[DIR_PPP] = &kTestRE[DIR_PPP * numberOfBCnodes];
+    //      kDistTest.f[DIR_MMP] = &kTestRE[DIR_MMP * numberOfBCnodes];
+    //      kDistTest.f[DIR_PMP] = &kTestRE[DIR_PMP * numberOfBCnodes];
+    //      kDistTest.f[DIR_MPP] = &kTestRE[DIR_MPP * numberOfBCnodes];
+    //      kDistTest.f[DIR_PPM] = &kTestRE[DIR_PPM * numberOfBCnodes];
+    //      kDistTest.f[DIR_MMM] = &kTestRE[DIR_MMM * numberOfBCnodes];
+    //      kDistTest.f[DIR_PMM] = &kTestRE[DIR_PMM * numberOfBCnodes];
+    //      kDistTest.f[DIR_MPM] = &kTestRE[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   //f1_W    = (D.f[DIR_P00   ])[k1e   ];
-   //   //f1_E    = (D.f[DIR_M00   ])[k1w   ];
-   //   //f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-   //   //f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-   //   //f1_B    = (D.f[DIR_00P   ])[k1t   ];
-   //   //f1_T    = (D.f[DIR_00M   ])[k1b   ];
-   //   //f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-   //   //f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-   //   //f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-   //   //f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-   //   //f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-   //   //f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-   //   //f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-   //   //f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-   //   //f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-   //   //f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-   //   //f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-   //   //f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   //f1_W    = (D.f[DIR_P00])[k1e   ];
+   //   //f1_E    = (D.f[DIR_M00])[k1w   ];
+   //   //f1_S    = (D.f[DIR_0P0])[k1n   ];
+   //   //f1_N    = (D.f[DIR_0M0])[k1s   ];
+   //   //f1_B    = (D.f[DIR_00P])[k1t   ];
+   //   //f1_T    = (D.f[DIR_00M])[k1b   ];
+   //   //f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+   //   //f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+   //   //f1_NW   = (D.f[DIR_PM0])[k1se  ];
+   //   //f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+   //   //f1_BW   = (D.f[DIR_P0P])[k1te  ];
+   //   //f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+   //   //f1_TW   = (D.f[DIR_P0M])[k1be  ];
+   //   //f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+   //   //f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+   //   //f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+   //   //f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+   //   //f1_BN   = (D.f[DIR_0MP])[k1ts  ];
    //   //f1_ZERO = (D.f[DIR_000])[k1zero];
-   //   //f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-   //   //f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-   //   //f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-   //   //f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-   //   //f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-   //   //f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-   //   //f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-   //   //f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+   //   //f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+   //   //f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+   //   //f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+   //   //f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+   //   //f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+   //   //f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+   //   //f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+   //   //f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   f1_E    = (D.f[DIR_P00   ])[k1e   ];
-   //   f1_W    = (D.f[DIR_M00   ])[k1w   ];
-   //   f1_N    = (D.f[DIR_0P0   ])[k1n   ];
-   //   f1_S    = (D.f[DIR_0M0   ])[k1s   ];
-   //   f1_T    = (D.f[DIR_00P   ])[k1t   ];
-   //   f1_B    = (D.f[DIR_00M   ])[k1b   ];
-   //   f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
-   //   f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
-   //   f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
-   //   f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
-   //   f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
-   //   f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
-   //   f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
-   //   f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
-   //   f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
-   //   f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
-   //   f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
-   //   f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   f1_E    = (D.f[DIR_P00])[k1e   ];
+   //   f1_W    = (D.f[DIR_M00])[k1w   ];
+   //   f1_N    = (D.f[DIR_0P0])[k1n   ];
+   //   f1_S    = (D.f[DIR_0M0])[k1s   ];
+   //   f1_T    = (D.f[DIR_00P])[k1t   ];
+   //   f1_B    = (D.f[DIR_00M])[k1b   ];
+   //   f1_NE   = (D.f[DIR_PP0])[k1ne  ];
+   //   f1_SW   = (D.f[DIR_MM0])[k1sw  ];
+   //   f1_SE   = (D.f[DIR_PM0])[k1se  ];
+   //   f1_NW   = (D.f[DIR_MP0])[k1nw  ];
+   //   f1_TE   = (D.f[DIR_P0P])[k1te  ];
+   //   f1_BW   = (D.f[DIR_M0M])[k1bw  ];
+   //   f1_BE   = (D.f[DIR_P0M])[k1be  ];
+   //   f1_TW   = (D.f[DIR_M0P])[k1tw  ];
+   //   f1_TN   = (D.f[DIR_0PP])[k1tn  ];
+   //   f1_BS   = (D.f[DIR_0MM])[k1bs  ];
+   //   f1_BN   = (D.f[DIR_0PM])[k1bn  ];
+   //   f1_TS   = (D.f[DIR_0MP])[k1ts  ];
    //   f1_ZERO = (D.f[DIR_000])[k1zero];
-   //   f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
-   //   f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
-   //   f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
-   //   f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
-   //   f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
-   //   f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
-   //   f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
-   //   f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
+   //   f1_TNE  = (D.f[DIR_PPP])[k1tne ];
+   //   f1_TSW  = (D.f[DIR_MMP])[k1tsw ];
+   //   f1_TSE  = (D.f[DIR_PMP])[k1tse ];
+   //   f1_TNW  = (D.f[DIR_MPP])[k1tnw ];
+   //   f1_BNE  = (D.f[DIR_PPM])[k1bne ];
+   //   f1_BSW  = (D.f[DIR_MMM])[k1bsw ];
+   //   f1_BSE  = (D.f[DIR_PMM])[k1bse ];
+   //   f1_BNW  = (D.f[DIR_MPM])[k1bnw ];
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////
    //   real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
-	  //real vx1      = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1);
-	  //real vx2      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1);
-	  //real vx3      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1);
+     //real vx1      = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1);
+     //real vx2      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1);
+     //real vx3      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////real omega = om1;
+     ////real omega = om1;
    //   real cusq  = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////Tï¿½st MK
-	  ////if(vx1 < zero) vx1 = zero;
+     ////Tï¿½st MK
+     ////if(vx1 < zero) vx1 = zero;
    //   //////////////////////////////////////////////////////////////////////////
-	  ////becomes higher with neighbor source and lower with local source
+     ////becomes higher with neighbor source and lower with local source
    //   //real fZERO = c8over27*  (rhoBC[k]-(one + rhoBC[k])*(cusq))                                                           ;
    //   //real fE    = c2over27*  (rhoBC[k]+(one + rhoBC[k])*(three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq));
    //   //real fW    = c2over27*  (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq));
@@ -3853,7 +3868,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    //   //real fBSE  = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
    //   //real fTNW  = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
    //   //////////////////////////////////////////////////////////////////////////
-	  //// based on VirtualFluids (kucher + fard)
+     //// based on VirtualFluids (kucher + fard)
    //   real fZERO = c8over27  * rhoBC[k] * (one                                                                      - cusq);
    //   real fE    = c2over27  * rhoBC[k] * (one + three * ( vx1        ) + c9over2 * ( vx1        ) * ( vx1        ) - cusq);
    //   real fW    = c2over27  * rhoBC[k] * (one + three * (-vx1        ) + c9over2 * (-vx1        ) * (-vx1        ) - cusq);
@@ -3882,7 +3897,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    //   real fBSE  = c1over216 * rhoBC[k] * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq);
    //   real fTNW  = c1over216 * rhoBC[k] * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq);
    ////   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //////test
+     //////test
    ////   real fZERO = c8over27  * ((drho1 + rhoBC[k]) / two) * (one                                                                      - cusq);
    ////   real fE    = c2over27  * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1        ) + c9over2 * ( vx1        ) * ( vx1        ) - cusq);
    ////   real fW    = c2over27  * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1        ) + c9over2 * (-vx1        ) * (-vx1        ) - cusq);
@@ -3911,190 +3926,190 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    ////   real fBSE  = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq);
    ////   real fTNW  = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq);
 
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+         //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//double mfabb = (D.f[DIR_P00   ])[k1e   ];
-			//double mfcbb = (D.f[DIR_M00   ])[k1w   ];
-			//double mfbab = (D.f[DIR_0P0   ])[k1n   ];
-			//double mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-			//double mfbba = (D.f[DIR_00P   ])[k1t   ];
-			//double mfbbc = (D.f[DIR_00M   ])[k1b   ];
-			//double mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-			//double mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-			//double mfacb = (D.f[DIR_PM0  ])[k1se  ];
-			//double mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-			//double mfaba = (D.f[DIR_P0P  ])[k1te  ];
-			//double mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-			//double mfabc = (D.f[DIR_P0M  ])[k1be  ];
-			//double mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-			//double mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-			//double mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-			//double mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-			//double mfbca = (D.f[DIR_0MP  ])[k1ts  ];
-			//double mfbbb = (D.f[DIR_000])[k1zero];
-			//double mfaaa = (D.f[DIR_PPP ])[k1tne ];
-			//double mfcca = (D.f[DIR_MMP ])[k1tsw ];
-			//double mfaca = (D.f[DIR_PMP ])[k1tse ];
-			//double mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-			//double mfaac = (D.f[DIR_PPM ])[k1bne ];
-			//double mfccc = (D.f[DIR_MMM ])[k1bsw ];
-			//double mfacc = (D.f[DIR_PMM ])[k1bse ];
-			//double mfcac = (D.f[DIR_MPM ])[k1bnw ];
-			real mfabb = (D.f[DIR_P00   ])[k1e   ];
-			real mfcbb = (D.f[DIR_M00   ])[k1w   ];
-			real mfbab = (D.f[DIR_0P0   ])[k1n   ];
-			real mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-			real mfbba = (D.f[DIR_00P   ])[k1t   ];
-			real mfbbc = (D.f[DIR_00M   ])[k1b   ];
-			real mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-			real mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-			real mfacb = (D.f[DIR_PM0  ])[k1se  ];
-			real mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-			real mfaba = (D.f[DIR_P0P  ])[k1te  ];
-			real mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-			real mfabc = (D.f[DIR_P0M  ])[k1be  ];
-			real mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-			real mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-			real mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-			real mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-			real mfbca = (D.f[DIR_0MP  ])[k1ts  ];
-			real mfbbb = (D.f[DIR_000])[k1zero];
-			real mfaaa = (D.f[DIR_PPP ])[k1tne ];
-			real mfcca = (D.f[DIR_MMP ])[k1tsw ];
-			real mfaca = (D.f[DIR_PMP ])[k1tse ];
-			real mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-			real mfaac = (D.f[DIR_PPM ])[k1bne ];
-			real mfccc = (D.f[DIR_MMM ])[k1bsw ];
-			real mfacc = (D.f[DIR_PMM ])[k1bse ];
-			real mfcac = (D.f[DIR_MPM ])[k1bnw ];
-
-			//real mfcbb = (D.f[DIR_P00   ])[ke   ];
-			//real mfabb = (D.f[DIR_M00   ])[kw   ];
-			//real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-			//real mfbab = (D.f[DIR_0M0   ])[ks   ];
-			//real mfbbc = (D.f[DIR_00P   ])[kt   ];
-			//real mfbba = (D.f[DIR_00M   ])[kb   ];
-			//real mfccb = (D.f[DIR_PP0  ])[kne  ];
-			//real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-			//real mfcab = (D.f[DIR_PM0  ])[kse  ];
-			//real mfacb = (D.f[DIR_MP0  ])[knw  ];
-			//real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-			//real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-			//real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-			//real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-			//real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-			//real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-			//real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-			//real mfbac = (D.f[DIR_0MP  ])[kts  ];
-			//real mfbbb = (D.f[DIR_000])[kzero];
-			//real mfccc = (D.f[DIR_PPP ])[ktne ];
-			//real mfaac = (D.f[DIR_MMP ])[ktsw ];
-			//real mfcac = (D.f[DIR_PMP ])[ktse ];
-			//real mfacc = (D.f[DIR_MPP ])[ktnw ];
-			//real mfcca = (D.f[DIR_PPM ])[kbne ];
-			//real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-			//real mfcaa = (D.f[DIR_PMM ])[kbse ];
-			//real mfaca = (D.f[DIR_MPM ])[kbnw ];
-			////////////////////////////////////////////////////////////////////////////////////
-			//real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
-			//				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
-			//				((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one
-			////////////////////////////////////////////////////////////////////////////////////
-			real rho = rhoBC[k];
-			////////////////////////////////////////////////////////////////////////////////////
-			real OoRho = c1o1 / (rho * 1.5f);
-			////////////////////////////////////////////////////////////////////////////////////
-			real vvx    = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
-						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
-						       (mfcbb-mfabb)) * OoRho;
-			real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) + 
-				             (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
-				               (mfbcb-mfbab)) * OoRho;
-			real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) + 
-				             (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
-				               (mfbbc-mfbba)) * OoRho;
-			/////////////////////////
-			//Test Values
-			//double vvx    = 0.016;
-			//double vvy    = zero;
-			//double vvz    = zero;
-			////////////////////////////////////////////////////////////////////////////////////////
-			////round off error test
-			//if(vvx!=zero){
-			//	(kDistTest.f[DIR_P00   ])[k] = mfabb;
-			//	(kDistTest.f[DIR_M00   ])[k] = mfcbb;
-			//	(kDistTest.f[DIR_0P0   ])[k] = mfbab;
-			//	(kDistTest.f[DIR_0M0   ])[k] = mfbcb;
-			//	(kDistTest.f[DIR_00P   ])[k] = mfbba;
-			//	(kDistTest.f[DIR_00M   ])[k] = mfbbc;
-			//	(kDistTest.f[DIR_PP0  ])[k] = mfaab;
-			//	(kDistTest.f[DIR_MM0  ])[k] = mfccb;
-			//	(kDistTest.f[DIR_PM0  ])[k] = mfacb;
-			//	(kDistTest.f[DIR_MP0  ])[k] = mfcab;
-			//	(kDistTest.f[DIR_P0P  ])[k] = mfaba;
-			//	(kDistTest.f[DIR_M0M  ])[k] = mfcbc;
-			//	(kDistTest.f[DIR_P0M  ])[k] = mfabc;
-			//	(kDistTest.f[DIR_M0P  ])[k] = mfcba;
-			//	(kDistTest.f[DIR_0PP  ])[k] = mfbaa;
-			//	(kDistTest.f[DIR_0MM  ])[k] = mfbcc;
-			//	(kDistTest.f[DIR_0PM  ])[k] = mfbac;
-			//	(kDistTest.f[DIR_0MP  ])[k] = mfbca;
-			//	(kDistTest.f[DIR_000])[k] = KQK;
-			//	(kDistTest.f[DIR_PPP ])[k] = mfaaa;
-			//	(kDistTest.f[DIR_MMP ])[k] = mfcca;
-			//	(kDistTest.f[DIR_PMP ])[k] = mfaca;
-			//	(kDistTest.f[DIR_MPP ])[k] = mfcaa;
-			//	(kDistTest.f[DIR_PPM ])[k] = mfaac;
-			//	(kDistTest.f[DIR_MMM ])[k] = mfccc;
-			//	(kDistTest.f[DIR_PMM ])[k] = mfacc;
-			//	(kDistTest.f[DIR_MPM ])[k] = mfcac;
-			//}else{
-			//	(kDistTest.f[DIR_P00   ])[k] = zero;
-			//	(kDistTest.f[DIR_M00   ])[k] = zero;
-			//	(kDistTest.f[DIR_0P0   ])[k] = zero;
-			//	(kDistTest.f[DIR_0M0   ])[k] = zero;
-			//	(kDistTest.f[DIR_00P   ])[k] = zero;
-			//	(kDistTest.f[DIR_00M   ])[k] = zero;
-			//	(kDistTest.f[DIR_PP0  ])[k] = zero;
-			//	(kDistTest.f[DIR_MM0  ])[k] = zero;
-			//	(kDistTest.f[DIR_PM0  ])[k] = zero;
-			//	(kDistTest.f[DIR_MP0  ])[k] = zero;
-			//	(kDistTest.f[DIR_P0P  ])[k] = zero;
-			//	(kDistTest.f[DIR_M0M  ])[k] = zero;
-			//	(kDistTest.f[DIR_P0M  ])[k] = zero;
-			//	(kDistTest.f[DIR_M0P  ])[k] = zero;
-			//	(kDistTest.f[DIR_0PP  ])[k] = zero;
-			//	(kDistTest.f[DIR_0MM  ])[k] = zero;
-			//	(kDistTest.f[DIR_0PM  ])[k] = zero;
-			//	(kDistTest.f[DIR_0MP  ])[k] = zero;
-			//	(kDistTest.f[DIR_000])[k] = zero;
-			//	(kDistTest.f[DIR_PPP ])[k] = zero;
-			//	(kDistTest.f[DIR_MMP ])[k] = zero;
-			//	(kDistTest.f[DIR_PMP ])[k] = zero;
-			//	(kDistTest.f[DIR_MPP ])[k] = zero;
-			//	(kDistTest.f[DIR_PPM ])[k] = zero;
-			//	(kDistTest.f[DIR_MMM ])[k] = zero;
-			//	(kDistTest.f[DIR_PMM ])[k] = zero;
-			//	(kDistTest.f[DIR_MPM ])[k] = zero;
-			//}
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// first bad fix for negative x velocity
-			////if(vvx > zero) vvx = zero;
-			//////////////////////////////////////////////////////////////////////////////////////
-			////// second bad fix for negative x velocity
-			////if(vvx > zero){
-			////	vvx = -vvx;
-			////	vvy = -vvy;
-			////	vvz = -vvz;
-			////}
-			////////////////////////////////////////////////////////////////////////////////////
-			double vx2    = vvx * vvx;
-			double vy2    = vvy * vvy;
-			double vz2    = vvz * vvz;
-			//////////////////////////////////////////////////////////////////////////////////
-			//original
+         //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+         //double mfabb = (D.f[DIR_P00])[k1e   ];
+         //double mfcbb = (D.f[DIR_M00])[k1w   ];
+         //double mfbab = (D.f[DIR_0P0])[k1n   ];
+         //double mfbcb = (D.f[DIR_0M0])[k1s   ];
+         //double mfbba = (D.f[DIR_00P])[k1t   ];
+         //double mfbbc = (D.f[DIR_00M])[k1b   ];
+         //double mfaab = (D.f[DIR_PP0])[k1ne  ];
+         //double mfccb = (D.f[DIR_MM0])[k1sw  ];
+         //double mfacb = (D.f[DIR_PM0])[k1se  ];
+         //double mfcab = (D.f[DIR_MP0])[k1nw  ];
+         //double mfaba = (D.f[DIR_P0P])[k1te  ];
+         //double mfcbc = (D.f[DIR_M0M])[k1bw  ];
+         //double mfabc = (D.f[DIR_P0M])[k1be  ];
+         //double mfcba = (D.f[DIR_M0P])[k1tw  ];
+         //double mfbaa = (D.f[DIR_0PP])[k1tn  ];
+         //double mfbcc = (D.f[DIR_0MM])[k1bs  ];
+         //double mfbac = (D.f[DIR_0PM])[k1bn  ];
+         //double mfbca = (D.f[DIR_0MP])[k1ts  ];
+         //double mfbbb = (D.f[DIR_000])[k1zero];
+         //double mfaaa = (D.f[DIR_PPP])[k1tne ];
+         //double mfcca = (D.f[DIR_MMP])[k1tsw ];
+         //double mfaca = (D.f[DIR_PMP])[k1tse ];
+         //double mfcaa = (D.f[DIR_MPP])[k1tnw ];
+         //double mfaac = (D.f[DIR_PPM])[k1bne ];
+         //double mfccc = (D.f[DIR_MMM])[k1bsw ];
+         //double mfacc = (D.f[DIR_PMM])[k1bse ];
+         //double mfcac = (D.f[DIR_MPM])[k1bnw ];
+         real mfabb = (D.f[DIR_P00])[k1e   ];
+         real mfcbb = (D.f[DIR_M00])[k1w   ];
+         real mfbab = (D.f[DIR_0P0])[k1n   ];
+         real mfbcb = (D.f[DIR_0M0])[k1s   ];
+         real mfbba = (D.f[DIR_00P])[k1t   ];
+         real mfbbc = (D.f[DIR_00M])[k1b   ];
+         real mfaab = (D.f[DIR_PP0])[k1ne  ];
+         real mfccb = (D.f[DIR_MM0])[k1sw  ];
+         real mfacb = (D.f[DIR_PM0])[k1se  ];
+         real mfcab = (D.f[DIR_MP0])[k1nw  ];
+         real mfaba = (D.f[DIR_P0P])[k1te  ];
+         real mfcbc = (D.f[DIR_M0M])[k1bw  ];
+         real mfabc = (D.f[DIR_P0M])[k1be  ];
+         real mfcba = (D.f[DIR_M0P])[k1tw  ];
+         real mfbaa = (D.f[DIR_0PP])[k1tn  ];
+         real mfbcc = (D.f[DIR_0MM])[k1bs  ];
+         real mfbac = (D.f[DIR_0PM])[k1bn  ];
+         real mfbca = (D.f[DIR_0MP])[k1ts  ];
+         real mfbbb = (D.f[DIR_000])[k1zero];
+         real mfaaa = (D.f[DIR_PPP])[k1tne ];
+         real mfcca = (D.f[DIR_MMP])[k1tsw ];
+         real mfaca = (D.f[DIR_PMP])[k1tse ];
+         real mfcaa = (D.f[DIR_MPP])[k1tnw ];
+         real mfaac = (D.f[DIR_PPM])[k1bne ];
+         real mfccc = (D.f[DIR_MMM])[k1bsw ];
+         real mfacc = (D.f[DIR_PMM])[k1bse ];
+         real mfcac = (D.f[DIR_MPM])[k1bnw ];
+
+         //real mfcbb = (D.f[DIR_P00])[ke   ];
+         //real mfabb = (D.f[DIR_M00])[kw   ];
+         //real mfbcb = (D.f[DIR_0P0])[kn   ];
+         //real mfbab = (D.f[DIR_0M0])[ks   ];
+         //real mfbbc = (D.f[DIR_00P])[kt   ];
+         //real mfbba = (D.f[DIR_00M])[kb   ];
+         //real mfccb = (D.f[DIR_PP0])[kne  ];
+         //real mfaab = (D.f[DIR_MM0])[ksw  ];
+         //real mfcab = (D.f[DIR_PM0])[kse  ];
+         //real mfacb = (D.f[DIR_MP0])[knw  ];
+         //real mfcbc = (D.f[DIR_P0P])[kte  ];
+         //real mfaba = (D.f[DIR_M0M])[kbw  ];
+         //real mfcba = (D.f[DIR_P0M])[kbe  ];
+         //real mfabc = (D.f[DIR_M0P])[ktw  ];
+         //real mfbcc = (D.f[DIR_0PP])[ktn  ];
+         //real mfbaa = (D.f[DIR_0MM])[kbs  ];
+         //real mfbca = (D.f[DIR_0PM])[kbn  ];
+         //real mfbac = (D.f[DIR_0MP])[kts  ];
+         //real mfbbb = (D.f[DIR_000])[kzero];
+         //real mfccc = (D.f[DIR_PPP])[ktne ];
+         //real mfaac = (D.f[DIR_MMP])[ktsw ];
+         //real mfcac = (D.f[DIR_PMP])[ktse ];
+         //real mfacc = (D.f[DIR_MPP])[ktnw ];
+         //real mfcca = (D.f[DIR_PPM])[kbne ];
+         //real mfaaa = (D.f[DIR_MMM])[kbsw ];
+         //real mfcaa = (D.f[DIR_PMM])[kbse ];
+         //real mfaca = (D.f[DIR_MPM])[kbnw ];
+         ////////////////////////////////////////////////////////////////////////////////////
+         //real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) +
+         //				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
+         //				((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one
+         ////////////////////////////////////////////////////////////////////////////////////
+         real rho = rhoBC[k];
+         ////////////////////////////////////////////////////////////////////////////////////
+         real OoRho = c1o1 / (rho * 1.5f);
+         ////////////////////////////////////////////////////////////////////////////////////
+         real vvx    = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+                       (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
+                         (mfcbb-mfabb)) * OoRho;
+         real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+                         (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
+                           (mfbcb-mfbab)) * OoRho;
+         real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+                         (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
+                           (mfbbc-mfbba)) * OoRho;
+         /////////////////////////
+         //Test Values
+         //double vvx    = 0.016;
+         //double vvy    = zero;
+         //double vvz    = zero;
+         ////////////////////////////////////////////////////////////////////////////////////////
+         ////round off error test
+         //if(vvx!=zero){
+         //	(kDistTest.f[DIR_P00])[k] = mfabb;
+         //	(kDistTest.f[DIR_M00])[k] = mfcbb;
+         //	(kDistTest.f[DIR_0P0])[k] = mfbab;
+         //	(kDistTest.f[DIR_0M0])[k] = mfbcb;
+         //	(kDistTest.f[DIR_00P])[k] = mfbba;
+         //	(kDistTest.f[DIR_00M])[k] = mfbbc;
+         //	(kDistTest.f[DIR_PP0])[k] = mfaab;
+         //	(kDistTest.f[DIR_MM0])[k] = mfccb;
+         //	(kDistTest.f[DIR_PM0])[k] = mfacb;
+         //	(kDistTest.f[DIR_MP0])[k] = mfcab;
+         //	(kDistTest.f[DIR_P0P])[k] = mfaba;
+         //	(kDistTest.f[DIR_M0M])[k] = mfcbc;
+         //	(kDistTest.f[DIR_P0M])[k] = mfabc;
+         //	(kDistTest.f[DIR_M0P])[k] = mfcba;
+         //	(kDistTest.f[DIR_0PP])[k] = mfbaa;
+         //	(kDistTest.f[DIR_0MM])[k] = mfbcc;
+         //	(kDistTest.f[DIR_0PM])[k] = mfbac;
+         //	(kDistTest.f[DIR_0MP])[k] = mfbca;
+         //	(kDistTest.f[DIR_000])[k] = KQK;
+         //	(kDistTest.f[DIR_PPP])[k] = mfaaa;
+         //	(kDistTest.f[DIR_MMP])[k] = mfcca;
+         //	(kDistTest.f[DIR_PMP])[k] = mfaca;
+         //	(kDistTest.f[DIR_MPP])[k] = mfcaa;
+         //	(kDistTest.f[DIR_PPM])[k] = mfaac;
+         //	(kDistTest.f[DIR_MMM])[k] = mfccc;
+         //	(kDistTest.f[DIR_PMM])[k] = mfacc;
+         //	(kDistTest.f[DIR_MPM])[k] = mfcac;
+         //}else{
+         //	(kDistTest.f[DIR_P00])[k] = zero;
+         //	(kDistTest.f[DIR_M00])[k] = zero;
+         //	(kDistTest.f[DIR_0P0])[k] = zero;
+         //	(kDistTest.f[DIR_0M0])[k] = zero;
+         //	(kDistTest.f[DIR_00P])[k] = zero;
+         //	(kDistTest.f[DIR_00M])[k] = zero;
+         //	(kDistTest.f[DIR_PP0])[k] = zero;
+         //	(kDistTest.f[DIR_MM0])[k] = zero;
+         //	(kDistTest.f[DIR_PM0])[k] = zero;
+         //	(kDistTest.f[DIR_MP0])[k] = zero;
+         //	(kDistTest.f[DIR_P0P])[k] = zero;
+         //	(kDistTest.f[DIR_M0M])[k] = zero;
+         //	(kDistTest.f[DIR_P0M])[k] = zero;
+         //	(kDistTest.f[DIR_M0P])[k] = zero;
+         //	(kDistTest.f[DIR_0PP])[k] = zero;
+         //	(kDistTest.f[DIR_0MM])[k] = zero;
+         //	(kDistTest.f[DIR_0PM])[k] = zero;
+         //	(kDistTest.f[DIR_0MP])[k] = zero;
+         //	(kDistTest.f[DIR_000])[k] = zero;
+         //	(kDistTest.f[DIR_PPP])[k] = zero;
+         //	(kDistTest.f[DIR_MMP])[k] = zero;
+         //	(kDistTest.f[DIR_PMP])[k] = zero;
+         //	(kDistTest.f[DIR_MPP])[k] = zero;
+         //	(kDistTest.f[DIR_PPM])[k] = zero;
+         //	(kDistTest.f[DIR_MMM])[k] = zero;
+         //	(kDistTest.f[DIR_PMM])[k] = zero;
+         //	(kDistTest.f[DIR_MPM])[k] = zero;
+         //}
+
+         //////////////////////////////////////////////////////////////////////////////////////
+         //// first bad fix for negative x velocity
+         ////if(vvx > zero) vvx = zero;
+         //////////////////////////////////////////////////////////////////////////////////////
+         ////// second bad fix for negative x velocity
+         ////if(vvx > zero){
+         ////	vvx = -vvx;
+         ////	vvy = -vvy;
+         ////	vvz = -vvz;
+         ////}
+         ////////////////////////////////////////////////////////////////////////////////////
+         double vx2    = vvx * vvx;
+         double vy2    = vvy * vvy;
+         double vz2    = vvz * vvz;
+         //////////////////////////////////////////////////////////////////////////////////
+         //original
             real XXb    = -c2o3 + vx2;
             real XXc    = -c1o2 * (XXb + c1o1 + vvx);
             real XXa    = XXc + vvx;
@@ -4104,213 +4119,213 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
             real ZZb    = -c2o3 + vz2;
             real ZZc    = -c1o2 * (ZZb + c1o1 + vvz);
             real ZZa    = ZZc + vvz;
-			//////////////////////////////////////////////////////////////////////////////////
-			//unkonditioniert
-            mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27; 
-			mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27;
-			mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27;
-			mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27;
-			mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27;
-			mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27;
-			mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54;
-			mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54;
-			mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54;
-			mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54;
-			mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54;
-			mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54;
-			mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54;
-			mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54;
-			mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54;
-			mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54;
-			mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54;
-			mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54;
-			mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27;
-			mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216;
-			mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216;
-			mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216;
-			mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216;
-			mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216;
-			mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216;
-			mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216;
-			mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216;
-			//////////////////////////////////////////////////////////
-			////konditioniert
-			//double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one);
-			//double OnoOver216Rho        = c1over216*rhoBC[k];
-			//mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2));
-			//mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2));
-			//mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2));
-			//mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2)));
-			//mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2));
-			//mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2)));
-			//mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))));
-			//mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))));
-			//mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)));
-			//mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)));
-			//mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))));
-			//mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))));
-			//mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)));
-			//mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)));
-			//mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))));
-			//mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))));
-			//mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)));
-			//mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)));
-			//mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2)));
-			//mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
-			//mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
-			//mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
-			//mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
-			//mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
-			//mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
-			//mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
-			//mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
+         //////////////////////////////////////////////////////////////////////////////////
+         //unkonditioniert
+            mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27;
+         mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27;
+         mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27;
+         mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27;
+         mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27;
+         mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27;
+         mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54;
+         mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54;
+         mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54;
+         mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54;
+         mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54;
+         mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54;
+         mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54;
+         mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54;
+         mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54;
+         mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54;
+         mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54;
+         mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54;
+         mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27;
+         mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216;
+         mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216;
+         mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216;
+         mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216;
+         mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216;
+         mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216;
+         mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216;
+         mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216;
+         //////////////////////////////////////////////////////////
+         ////konditioniert
+         //double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one);
+         //double OnoOver216Rho        = c1over216*rhoBC[k];
+         //mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2));
+         //mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2));
+         //mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2));
+         //mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2)));
+         //mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2));
+         //mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2)));
+         //mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))));
+         //mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))));
+         //mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)));
+         //mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)));
+         //mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))));
+         //mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))));
+         //mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)));
+         //mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)));
+         //mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))));
+         //mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))));
+         //mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)));
+         //mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)));
+         //mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2)));
+         //mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
+         //mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
+         //mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
+         //mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
+         //mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
+         //mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
+         //mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
+         //mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //if (isEvenTimestep==true)
       //{
-      //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      //} 
+      //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+      //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+      //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+      //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+      //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+      //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+      //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+      //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+      //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+      //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+      //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+      //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+      //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+      //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+      //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+      //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+      //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+      //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+      //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+      //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+      //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+      //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+      //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+      //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+      //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
+      //}
       //else
       //{
-      //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+      //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+      //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+      //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+      //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+      //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+      //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+      //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+      //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+      //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+      //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+      //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+      //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+      //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+      //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+      //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+      //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+      //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+      //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+      //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+      //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+      //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+      //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+      //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+      //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
       //}
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 
-			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
-			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
-			//(D.f[DIR_P00   ])[ke   ] = mfcbb;
-			//(D.f[DIR_M00   ])[kw   ] = mfabb;
-			//(D.f[DIR_0P0   ])[kn   ] = mfbcb;
-			//(D.f[DIR_0M0   ])[ks   ] = mfbab;
-			//(D.f[DIR_00P   ])[kt   ] = mfbbc;
-			//(D.f[DIR_00M   ])[kb   ] = mfbba;
-			//(D.f[DIR_PP0  ])[kne  ] = mfccb;
-			//(D.f[DIR_MM0  ])[ksw  ] = mfaab;
-			//(D.f[DIR_PM0  ])[kse  ] = mfcab;
-			//(D.f[DIR_MP0  ])[knw  ] = mfacb;
-			//(D.f[DIR_P0P  ])[kte  ] = mfcbc;
-			//(D.f[DIR_M0M  ])[kbw  ] = mfaba;
-			//(D.f[DIR_P0M  ])[kbe  ] = mfcba;
-			//(D.f[DIR_M0P  ])[ktw  ] = mfabc;
-			//(D.f[DIR_0PP  ])[ktn  ] = mfbcc;
-			//(D.f[DIR_0MM  ])[kbs  ] = mfbaa;
-			//(D.f[DIR_0PM  ])[kbn  ] = mfbca;
-			//(D.f[DIR_0MP  ])[kts  ] = mfbac;
-			//(D.f[DIR_000])[kzero] = mfbbb;
-			//(D.f[DIR_PPP ])[ktne ] = mfccc;
-			//(D.f[DIR_MMP ])[ktsw ] = mfaac;
-			//(D.f[DIR_PMP ])[ktse ] = mfcac;
-			//(D.f[DIR_MPP ])[ktnw ] = mfacc;
-			//(D.f[DIR_PPM ])[kbne ] = mfcca;
-			//(D.f[DIR_MMM ])[kbsw ] = mfaaa;
-			//(D.f[DIR_PMM ])[kbse ] = mfcaa;
-			//(D.f[DIR_MPM ])[kbnw ] = mfaca;
-
-      //(D.f[DIR_P00   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;  
-      //(D.f[DIR_M00   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;  
-      //(D.f[DIR_0P0   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;  
-      //(D.f[DIR_0M0   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;  
-      //(D.f[DIR_00P   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;  
-      //(D.f[DIR_00M   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;  
-      //(D.f[DIR_PP0  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;  
-      //(D.f[DIR_MM0  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;  
-      //(D.f[DIR_PM0  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;  
-      //(D.f[DIR_MP0  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;  
-      //(D.f[DIR_P0P  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;  
-      //(D.f[DIR_M0M  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;  
-      //(D.f[DIR_P0M  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;  
-      //(D.f[DIR_M0P  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;  
-      //(D.f[DIR_0PP  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;  
-      //(D.f[DIR_0MM  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;  
-      //(D.f[DIR_0PM  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;  
-      //(D.f[DIR_0MP  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;  
+         (D.f[DIR_P00])[ke   ] = mfabb;//mfcbb;
+         (D.f[DIR_M00])[kw   ] = mfcbb;//mfabb;
+         (D.f[DIR_0P0])[kn   ] = mfbab;//mfbcb;
+         (D.f[DIR_0M0])[ks   ] = mfbcb;//mfbab;
+         (D.f[DIR_00P])[kt   ] = mfbba;//mfbbc;
+         (D.f[DIR_00M])[kb   ] = mfbbc;//mfbba;
+         (D.f[DIR_PP0])[kne  ] = mfaab;//mfccb;
+         (D.f[DIR_MM0])[ksw  ] = mfccb;//mfaab;
+         (D.f[DIR_PM0])[kse  ] = mfacb;//mfcab;
+         (D.f[DIR_MP0])[knw  ] = mfcab;//mfacb;
+         (D.f[DIR_P0P])[kte  ] = mfaba;//mfcbc;
+         (D.f[DIR_M0M])[kbw  ] = mfcbc;//mfaba;
+         (D.f[DIR_P0M])[kbe  ] = mfabc;//mfcba;
+         (D.f[DIR_M0P])[ktw  ] = mfcba;//mfabc;
+         (D.f[DIR_0PP])[ktn  ] = mfbaa;//mfbcc;
+         (D.f[DIR_0MM])[kbs  ] = mfbcc;//mfbaa;
+         (D.f[DIR_0PM])[kbn  ] = mfbac;//mfbca;
+         (D.f[DIR_0MP])[kts  ] = mfbca;//mfbac;
+         (D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
+         (D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc;
+         (D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac;
+         (D.f[DIR_PMP])[ktse ] = mfaca;//mfcac;
+         (D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc;
+         (D.f[DIR_PPM])[kbne ] = mfaac;//mfcca;
+         (D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa;
+         (D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa;
+         (D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca;
+         //(D.f[DIR_P00])[ke   ] = mfcbb;
+         //(D.f[DIR_M00])[kw   ] = mfabb;
+         //(D.f[DIR_0P0])[kn   ] = mfbcb;
+         //(D.f[DIR_0M0])[ks   ] = mfbab;
+         //(D.f[DIR_00P])[kt   ] = mfbbc;
+         //(D.f[DIR_00M])[kb   ] = mfbba;
+         //(D.f[DIR_PP0])[kne  ] = mfccb;
+         //(D.f[DIR_MM0])[ksw  ] = mfaab;
+         //(D.f[DIR_PM0])[kse  ] = mfcab;
+         //(D.f[DIR_MP0])[knw  ] = mfacb;
+         //(D.f[DIR_P0P])[kte  ] = mfcbc;
+         //(D.f[DIR_M0M])[kbw  ] = mfaba;
+         //(D.f[DIR_P0M])[kbe  ] = mfcba;
+         //(D.f[DIR_M0P])[ktw  ] = mfabc;
+         //(D.f[DIR_0PP])[ktn  ] = mfbcc;
+         //(D.f[DIR_0MM])[kbs  ] = mfbaa;
+         //(D.f[DIR_0PM])[kbn  ] = mfbca;
+         //(D.f[DIR_0MP])[kts  ] = mfbac;
+         //(D.f[DIR_000])[kzero] = mfbbb;
+         //(D.f[DIR_PPP])[ktne ] = mfccc;
+         //(D.f[DIR_MMP])[ktsw ] = mfaac;
+         //(D.f[DIR_PMP])[ktse ] = mfcac;
+         //(D.f[DIR_MPP])[ktnw ] = mfacc;
+         //(D.f[DIR_PPM])[kbne ] = mfcca;
+         //(D.f[DIR_MMM])[kbsw ] = mfaaa;
+         //(D.f[DIR_PMM])[kbse ] = mfcaa;
+         //(D.f[DIR_MPM])[kbnw ] = mfaca;
+
+      //(D.f[DIR_P00])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;
+      //(D.f[DIR_M00])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;
+      //(D.f[DIR_0P0])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;
+      //(D.f[DIR_0M0])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;
+      //(D.f[DIR_00P])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;
+      //(D.f[DIR_00M])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;
+      //(D.f[DIR_PP0])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;
+      //(D.f[DIR_MM0])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;
+      //(D.f[DIR_PM0])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;
+      //(D.f[DIR_MP0])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;
+      //(D.f[DIR_P0P])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;
+      //(D.f[DIR_M0M])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;
+      //(D.f[DIR_P0M])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;
+      //(D.f[DIR_M0P])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;
+      //(D.f[DIR_0PP])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;
+      //(D.f[DIR_0MM])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;
+      //(D.f[DIR_0PM])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;
+      //(D.f[DIR_0MP])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;
       //(D.f[DIR_000])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO;
-      //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE; 
-      //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW; 
-      //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE; 
-      //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW; 
-      //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE; 
-      //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW; 
-      //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE; 
-      //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW; 
+      //(D.f[DIR_PPP])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE;
+      //(D.f[DIR_MMM])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW;
+      //(D.f[DIR_PPM])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE;
+      //(D.f[DIR_MMP])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW;
+      //(D.f[DIR_PMP])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE;
+      //(D.f[DIR_MPM])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW;
+      //(D.f[DIR_PMM])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE;
+      //(D.f[DIR_MPP])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4354,19 +4369,20 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceZero27(	 real* DD, 
-												 int* k_Q, 
-												 unsigned int numberOfBCnodes, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QPressDeviceZero27(
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -4410,94 +4426,94 @@ __global__ void QPressDeviceZero27(	 real* DD,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
-	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      (D.f[DIR_P00   ])[ke   ] =c0o1;
-      (D.f[DIR_M00   ])[kw   ] =c0o1;
-      (D.f[DIR_0P0   ])[kn   ] =c0o1;
-      (D.f[DIR_0M0   ])[ks   ] =c0o1;
-      (D.f[DIR_00P   ])[kt   ] =c0o1;
-      (D.f[DIR_00M   ])[kb   ] =c0o1;
-      (D.f[DIR_PP0  ])[kne  ] =c0o1;
-      (D.f[DIR_MM0  ])[ksw  ] =c0o1;
-      (D.f[DIR_PM0  ])[kse  ] =c0o1;
-      (D.f[DIR_MP0  ])[knw  ] =c0o1;
-      (D.f[DIR_P0P  ])[kte  ] =c0o1;
-      (D.f[DIR_M0M  ])[kbw  ] =c0o1;
-      (D.f[DIR_P0M  ])[kbe  ] =c0o1;
-      (D.f[DIR_M0P  ])[ktw  ] =c0o1;
-      (D.f[DIR_0PP  ])[ktn  ] =c0o1;
-      (D.f[DIR_0MM  ])[kbs  ] =c0o1;
-      (D.f[DIR_0PM  ])[kbn  ] =c0o1;
-      (D.f[DIR_0MP  ])[kts  ] =c0o1;
+     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      (D.f[DIR_P00])[ke   ] =c0o1;
+      (D.f[DIR_M00])[kw   ] =c0o1;
+      (D.f[DIR_0P0])[kn   ] =c0o1;
+      (D.f[DIR_0M0])[ks   ] =c0o1;
+      (D.f[DIR_00P])[kt   ] =c0o1;
+      (D.f[DIR_00M])[kb   ] =c0o1;
+      (D.f[DIR_PP0])[kne  ] =c0o1;
+      (D.f[DIR_MM0])[ksw  ] =c0o1;
+      (D.f[DIR_PM0])[kse  ] =c0o1;
+      (D.f[DIR_MP0])[knw  ] =c0o1;
+      (D.f[DIR_P0P])[kte  ] =c0o1;
+      (D.f[DIR_M0M])[kbw  ] =c0o1;
+      (D.f[DIR_P0M])[kbe  ] =c0o1;
+      (D.f[DIR_M0P])[ktw  ] =c0o1;
+      (D.f[DIR_0PP])[ktn  ] =c0o1;
+      (D.f[DIR_0MM])[kbs  ] =c0o1;
+      (D.f[DIR_0PM])[kbn  ] =c0o1;
+      (D.f[DIR_0MP])[kts  ] =c0o1;
       (D.f[DIR_000])[kzero] =c0o1;
-      (D.f[DIR_PPP ])[ktne ] =c0o1;
-      (D.f[DIR_MMP ])[ktsw ] =c0o1;
-      (D.f[DIR_PMP ])[ktse ] =c0o1;
-      (D.f[DIR_MPP ])[ktnw ] =c0o1;
-      (D.f[DIR_PPM ])[kbne ] =c0o1;
-      (D.f[DIR_MMM ])[kbsw ] =c0o1;
-      (D.f[DIR_PMM ])[kbse ] =c0o1;
-      (D.f[DIR_MPM ])[kbnw ] =c0o1;
+      (D.f[DIR_PPP])[ktne ] =c0o1;
+      (D.f[DIR_MMP])[ktsw ] =c0o1;
+      (D.f[DIR_PMP])[ktse ] =c0o1;
+      (D.f[DIR_MPP])[ktnw ] =c0o1;
+      (D.f[DIR_PPM])[kbne ] =c0o1;
+      (D.f[DIR_MMM])[kbsw ] =c0o1;
+      (D.f[DIR_PMM])[kbse ] =c0o1;
+      (D.f[DIR_MPM])[kbnw ] =c0o1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4541,22 +4557,23 @@ __global__ void QPressDeviceZero27(	 real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceFake27(	 real* rhoBC,
-												 real* DD, 
-												 int* k_Q, 
-												 int* k_N, 
-												 int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QPressDeviceFake27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -4630,148 +4647,148 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3;
       vx1    =  ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                   ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-                  (f1_E - f1_W); 
+                  (f1_E - f1_W);
 
 
       vx2    =   (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                   ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-                  (f1_N - f1_S); 
+                  (f1_N - f1_S);
 
       vx3    =   ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
                   (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-                  (f1_T - f1_B); 
+                  (f1_T - f1_B);
 
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     drho1 = drho1 - rhoBC[k];
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[DIR_M00   ])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[DIR_0P0   ])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[DIR_0M0   ])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[DIR_00P   ])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[DIR_00M   ])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P00])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_M00])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_0P0])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_0M0])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_00P])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_00M])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4815,461 +4832,462 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////
-__global__ void QPressDevice27_IntBB(real* rho,
-												real* DD, 
-												int* k_Q, 
-												real* QQ,
-												unsigned int numberOfBCnodes, 
-												real om1, 
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat, 
-												bool isEvenTimestep)
+__global__ void QPressDevice27_IntBB(
+    real* rho,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep==true)
-	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-	} 
-	else
-	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if(k < numberOfBCnodes)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		//real VeloX = vx[k];
-		//real VeloY = vy[k];
-		//real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny)
-		////////////////////////////////////////////////////////////////////////////////
-		real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-			*q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
-		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
-		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
-		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
-		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
-		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
-		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
-		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
-		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
-		////////////////////////////////////////////////////////////////////////////////
-		//index
-		unsigned int KQK  = k_Q[k];
-		unsigned int kzero= KQK;
-		unsigned int ke   = KQK;
-		unsigned int kw   = neighborX[KQK];
-		unsigned int kn   = KQK;
-		unsigned int ks   = neighborY[KQK];
-		unsigned int kt   = KQK;
-		unsigned int kb   = neighborZ[KQK];
-		unsigned int ksw  = neighborY[kw];
-		unsigned int kne  = KQK;
-		unsigned int kse  = ks;
-		unsigned int knw  = kw;
-		unsigned int kbw  = neighborZ[kw];
-		unsigned int kte  = KQK;
-		unsigned int kbe  = kb;
-		unsigned int ktw  = kw;
-		unsigned int kbs  = neighborZ[ks];
-		unsigned int ktn  = KQK;
-		unsigned int kbn  = kb;
-		unsigned int kts  = ks;
-		unsigned int ktse = ks;
-		unsigned int kbnw = kbw;
-		unsigned int ktnw = kw;
-		unsigned int kbse = kbs;
-		unsigned int ktsw = ksw;
-		unsigned int kbne = kb;
-		unsigned int ktne = KQK;
-		unsigned int kbsw = neighborZ[ksw];
-		////////////////////////////////////////////////////////////////////////////////
-		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
-			f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-
-		f_W    = (D.f[DIR_P00   ])[ke   ];
-		f_E    = (D.f[DIR_M00   ])[kw   ];
-		f_S    = (D.f[DIR_0P0   ])[kn   ];
-		f_N    = (D.f[DIR_0M0   ])[ks   ];
-		f_B    = (D.f[DIR_00P   ])[kt   ];
-		f_T    = (D.f[DIR_00M   ])[kb   ];
-		f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
-		////////////////////////////////////////////////////////////////////////////////
-		real vx1, vx2, vx3, drho, feq, q;
-		drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-			f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
-
-		vx1    = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-			((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-			(f_E - f_W))/(c1o1+drho); 
-
-
-		vx2    =  ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-			((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-			(f_N - f_S))/(c1o1+drho); 
-
-		vx3    =  (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-			(-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-			(f_T - f_B))/(c1o1+drho); 
-
-		real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
-
-		//////////////////////////////////////////////////////////////////////////
-		if (isEvenTimestep==false)
-		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-		} 
-		else
-		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-		}
-		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		//Test
-		//(D.f[DIR_000])[k]=c1o10;
-		real rhoDiff = drho - rho[k];
-		real VeloX = vx1;
-		real VeloY = vx2;
-		real VeloZ = vx3;
-		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-		q = q_dirE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*( vx1        )*( vx1        )-cu_sq); 
-			(D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
-		}
-
-		q = q_dirW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-			(D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
-		}
-
-		q = q_dirN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-			(D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
-		}
-
-		q = q_dirS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-			(D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
-		}
-
-		q = q_dirT[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(         vx3)*(         vx3)-cu_sq); 
-			(D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
-		}
-
-		q = q_dirB[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-			(D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
-		}
-
-		q = q_dirNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-			(D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-			(D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-			(D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-			(D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirTE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-			(D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-			(D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-			(D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-			(D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-			(D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-			(D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-			(D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-			(D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-			(D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-			(D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-			(D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-			(D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-			(D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-			(D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-			(D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-			(D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
-		}
-	}
+   Distributions27 D;
+   if (isEvenTimestep==true)
+   {
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+   }
+   else
+   {
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+   }
+   ////////////////////////////////////////////////////////////////////////////////
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
+
+   const unsigned nx = blockDim.x;
+   const unsigned ny = gridDim.x;
+
+   const unsigned k = nx*(ny*z + y) + x;
+   //////////////////////////////////////////////////////////////////////////
+
+   if(k < numberOfBCnodes)
+   {
+      ////////////////////////////////////////////////////////////////////////////////
+      //real VeloX = vx[k];
+      //real VeloY = vy[k];
+      //real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny)
+      ////////////////////////////////////////////////////////////////////////////////
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
+         *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
+         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
+         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
+         *q_dirBSE, *q_dirBNW;
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
+      ////////////////////////////////////////////////////////////////////////////////
+      //index
+      unsigned int KQK  = k_Q[k];
+      unsigned int kzero= KQK;
+      unsigned int ke   = KQK;
+      unsigned int kw   = neighborX[KQK];
+      unsigned int kn   = KQK;
+      unsigned int ks   = neighborY[KQK];
+      unsigned int kt   = KQK;
+      unsigned int kb   = neighborZ[KQK];
+      unsigned int ksw  = neighborY[kw];
+      unsigned int kne  = KQK;
+      unsigned int kse  = ks;
+      unsigned int knw  = kw;
+      unsigned int kbw  = neighborZ[kw];
+      unsigned int kte  = KQK;
+      unsigned int kbe  = kb;
+      unsigned int ktw  = kw;
+      unsigned int kbs  = neighborZ[ks];
+      unsigned int ktn  = KQK;
+      unsigned int kbn  = kb;
+      unsigned int kts  = ks;
+      unsigned int ktse = ks;
+      unsigned int kbnw = kbw;
+      unsigned int ktnw = kw;
+      unsigned int kbse = kbs;
+      unsigned int ktsw = ksw;
+      unsigned int kbne = kb;
+      unsigned int ktne = KQK;
+      unsigned int kbsw = neighborZ[ksw];
+      ////////////////////////////////////////////////////////////////////////////////
+      real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
+         f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
+
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
+      ////////////////////////////////////////////////////////////////////////////////
+      real vx1, vx2, vx3, drho, feq, q;
+      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+         f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
+         f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
+
+      vx1    = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+         ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+         (f_E - f_W))/(c1o1+drho);
+
+
+      vx2    =  ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+         ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+         (f_N - f_S))/(c1o1+drho);
+
+      vx3    =  (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+         (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+         (f_T - f_B))/(c1o1+drho);
+
+      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
+
+      //////////////////////////////////////////////////////////////////////////
+      if (isEvenTimestep==false)
+      {
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
+      }
+      else
+      {
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+      }
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      //Test
+      //(D.f[DIR_000])[k]=c1o10;
+      real rhoDiff = drho - rho[k];
+      real VeloX = vx1;
+      real VeloY = vx2;
+      real VeloZ = vx3;
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      q = q_dirE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*( vx1        )*( vx1        )-cu_sq);
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
+      }
+
+      q = q_dirW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
+      }
+
+      q = q_dirN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
+      }
+
+      q = q_dirS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
+      }
+
+      q = q_dirT[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(         vx3)*(         vx3)-cu_sq);
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
+      }
+
+      q = q_dirB[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
+      }
+
+      q = q_dirNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirTE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
+      }
+   }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
index 8675780d26e63656b04fdfc1f9836b1eba8d1b87..5d4572e234fdcad072e9b666c911f3250c32346a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
@@ -21,7 +21,7 @@ __global__ void PressSchlaff27(real* rhoBC,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -71,94 +71,94 @@ __global__ void PressSchlaff27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[DIR_P00   ])[ke   ];
-      f1_W    = (D.f[DIR_M00   ])[kw   ];
-      f1_N    = (D.f[DIR_0P0   ])[kn   ];
-      f1_S    = (D.f[DIR_0M0   ])[ks   ];
-      f1_T    = (D.f[DIR_00P   ])[kt   ];
-      f1_B    = (D.f[DIR_00M   ])[kb   ];
-      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_E    = (D.f[DIR_P00])[ke   ];
+      f1_W    = (D.f[DIR_M00])[kw   ];
+      f1_N    = (D.f[DIR_0P0])[kn   ];
+      f1_S    = (D.f[DIR_0M0])[ks   ];
+      f1_T    = (D.f[DIR_00P])[kt   ];
+      f1_B    = (D.f[DIR_00M])[kb   ];
+      f1_NE   = (D.f[DIR_PP0])[kne  ];
+      f1_SW   = (D.f[DIR_MM0])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0])[kse  ];
+      f1_NW   = (D.f[DIR_MP0])[knw  ];
+      f1_TE   = (D.f[DIR_P0P])[kte  ];
+      f1_BW   = (D.f[DIR_M0M])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP])[kts  ];
       f1_ZERO = (D.f[DIR_000])[kzero];
-      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
-      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
-      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
+      f1_TNE  = (D.f[DIR_PPP])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -222,15 +222,15 @@ __global__ void PressSchlaff27(real* rhoBC,
 
       deltaVz0[k] = tempDeltaV;
 
-      (D.f[DIR_00M   ])[kb   ] = f1_B   ;
-      (D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
-      (D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
-      (D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
-      (D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
-      (D.f[DIR_PPM ])[kbne ] = f1_BNE ;
-      (D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
-      (D.f[DIR_PMM ])[kbse ] = f1_BSE ;
-      (D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
+      (D.f[DIR_00M])[kb   ] = f1_B   ;
+      (D.f[DIR_M0M])[kbw  ] = f1_BW  ;
+      (D.f[DIR_P0M])[kbe  ] = f1_BE  ;
+      (D.f[DIR_0MM])[kbs  ] = f1_BS  ;
+      (D.f[DIR_0PM])[kbn  ] = f1_BN  ;
+      (D.f[DIR_PPM])[kbne ] = f1_BNE ;
+      (D.f[DIR_MMM])[kbsw ] = f1_BSW ;
+      (D.f[DIR_PMM])[kbse ] = f1_BSE ;
+      (D.f[DIR_MPM])[kbnw ] = f1_BNW ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -285,7 +285,7 @@ __global__ void VelSchlaff27(  int t,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -335,122 +335,122 @@ __global__ void VelSchlaff27(  int t,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[DIR_P00   ])[ke   ];
-      f1_W    = (D.f[DIR_M00   ])[kw   ];
-      f1_N    = (D.f[DIR_0P0   ])[kn   ];
-      f1_S    = (D.f[DIR_0M0   ])[ks   ];
-      f1_T    = (D.f[DIR_00P   ])[kt   ];
-      f1_B    = (D.f[DIR_00M   ])[kb   ];
-      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_E    = (D.f[DIR_P00])[ke   ];
+      f1_W    = (D.f[DIR_M00])[kw   ];
+      f1_N    = (D.f[DIR_0P0])[kn   ];
+      f1_S    = (D.f[DIR_0M0])[ks   ];
+      f1_T    = (D.f[DIR_00P])[kt   ];
+      f1_B    = (D.f[DIR_00M])[kb   ];
+      f1_NE   = (D.f[DIR_PP0])[kne  ];
+      f1_SW   = (D.f[DIR_MM0])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0])[kse  ];
+      f1_NW   = (D.f[DIR_MP0])[knw  ];
+      f1_TE   = (D.f[DIR_P0P])[kte  ];
+      f1_BW   = (D.f[DIR_M0M])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP])[kts  ];
       f1_ZERO = (D.f[DIR_000])[kzero];
-      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
-      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
-      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
-      //f1_W    = (D.f[DIR_P00   ])[ke   ];
-      //f1_E    = (D.f[DIR_M00   ])[kw   ];
-      //f1_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f1_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f1_B    = (D.f[DIR_00P   ])[kt   ];
-      //f1_T    = (D.f[DIR_00M   ])[kb   ];
-      //f1_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f1_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f1_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f1_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f1_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f1_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f1_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f1_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f1_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f1_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f1_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f1_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f1_TNE  = (D.f[DIR_PPP])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM])[kbnw ];
+      //f1_W    = (D.f[DIR_P00])[ke   ];
+      //f1_E    = (D.f[DIR_M00])[kw   ];
+      //f1_S    = (D.f[DIR_0P0])[kn   ];
+      //f1_N    = (D.f[DIR_0M0])[ks   ];
+      //f1_B    = (D.f[DIR_00P])[kt   ];
+      //f1_T    = (D.f[DIR_00M])[kb   ];
+      //f1_SW   = (D.f[DIR_PP0])[kne  ];
+      //f1_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f1_NW   = (D.f[DIR_PM0])[kse  ];
+      //f1_SE   = (D.f[DIR_MP0])[knw  ];
+      //f1_BW   = (D.f[DIR_P0P])[kte  ];
+      //f1_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f1_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f1_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f1_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f1_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f1_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f1_BN   = (D.f[DIR_0MP])[kts  ];
       //f1_ZERO = (D.f[DIR_000])[kzero];
-      //f1_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f1_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f1_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f1_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f1_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f1_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f1_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f1_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //f1_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f1_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f1_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f1_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f1_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f1_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f1_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f1_TSE  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -522,64 +522,64 @@ __global__ void VelSchlaff27(  int t,
       f1_TNW = f1_BSE - c1o36 * (VX - VY - VZ);
 
       deltaVz0[k] = tempDeltaV;
-      (D.f[DIR_00P   ])[kt   ] = f1_T  ;
-      (D.f[DIR_P0P  ])[kte  ] = f1_TE ;
-      (D.f[DIR_M0P  ])[ktw  ] = f1_TW ;
-      (D.f[DIR_0PP  ])[ktn  ] = f1_TN ;
-      (D.f[DIR_0MP  ])[kts  ] = f1_TS ;
-      (D.f[DIR_PPP ])[ktne ] = f1_TNE;
-      (D.f[DIR_MMP ])[ktsw ] = f1_TSW;
-      (D.f[DIR_PMP ])[ktse ] = f1_TSE;
-      (D.f[DIR_MPP ])[ktnw ] = f1_TNW;
-
-      //(D.f[DIR_00M   ])[kb   ] = f1_B   ;
-      //(D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
-      //(D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
-      //(D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
-      //(D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
-      //(D.f[DIR_PPM ])[kbne ] = f1_BNE ;
-      //(D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
-      //(D.f[DIR_PMM ])[kbse ] = f1_BSE ;
-      //(D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
-
-
-      //(D.f[DIR_00P   ])[kt   ] = f1_B  ;
-      //(D.f[DIR_P0P  ])[kte  ] = f1_BW ;
-      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE ;
-      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS ;
-      //(D.f[DIR_0MP  ])[kts  ] = f1_BN ;
-      //(D.f[DIR_PPP ])[ktne ] = f1_BSW;
-      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE;
-      //(D.f[DIR_PMP ])[ktse ] = f1_BNW;
-      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE;
-
-      //(D.f[DIR_P00   ])[ke   ] = f1_W   -c2over27*drho1;
-      //(D.f[DIR_M00   ])[kw   ] = f1_E   -c2over27*drho1;
-      //(D.f[DIR_0P0   ])[kn   ] = f1_S   -c2over27*drho1;
-      //(D.f[DIR_0M0   ])[ks   ] = f1_N   -c2over27*drho1;
-      //(D.f[DIR_00P   ])[kt   ] = f1_B   -c2over27*drho1;
-      //(D.f[DIR_00M   ])[kb   ] = f1_T   -c2over27*drho1;
-      //(D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1over54*drho1;
-      //(D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1over54*drho1;
-      //(D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1over54*drho1;
-      //(D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1over54*drho1;
-      //(D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1over54*drho1;
-      //(D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1over54*drho1;
-      //(D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1over54*drho1;
-      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1over54*drho1;
-      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1over54*drho1;
-      //(D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1over54*drho1;
-      //(D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1over54*drho1;
-      //(D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1over54*drho1;
+      (D.f[DIR_00P])[kt   ] = f1_T  ;
+      (D.f[DIR_P0P])[kte  ] = f1_TE ;
+      (D.f[DIR_M0P])[ktw  ] = f1_TW ;
+      (D.f[DIR_0PP])[ktn  ] = f1_TN ;
+      (D.f[DIR_0MP])[kts  ] = f1_TS ;
+      (D.f[DIR_PPP])[ktne ] = f1_TNE;
+      (D.f[DIR_MMP])[ktsw ] = f1_TSW;
+      (D.f[DIR_PMP])[ktse ] = f1_TSE;
+      (D.f[DIR_MPP])[ktnw ] = f1_TNW;
+
+      //(D.f[DIR_00M])[kb   ] = f1_B   ;
+      //(D.f[DIR_M0M])[kbw  ] = f1_BW  ;
+      //(D.f[DIR_P0M])[kbe  ] = f1_BE  ;
+      //(D.f[DIR_0MM])[kbs  ] = f1_BS  ;
+      //(D.f[DIR_0PM])[kbn  ] = f1_BN  ;
+      //(D.f[DIR_PPM])[kbne ] = f1_BNE ;
+      //(D.f[DIR_MMM])[kbsw ] = f1_BSW ;
+      //(D.f[DIR_PMM])[kbse ] = f1_BSE ;
+      //(D.f[DIR_MPM])[kbnw ] = f1_BNW ;
+
+
+      //(D.f[DIR_00P])[kt   ] = f1_B  ;
+      //(D.f[DIR_P0P])[kte  ] = f1_BW ;
+      //(D.f[DIR_M0P])[ktw  ] = f1_BE ;
+      //(D.f[DIR_0PP])[ktn  ] = f1_BS ;
+      //(D.f[DIR_0MP])[kts  ] = f1_BN ;
+      //(D.f[DIR_PPP])[ktne ] = f1_BSW;
+      //(D.f[DIR_MMP])[ktsw ] = f1_BNE;
+      //(D.f[DIR_PMP])[ktse ] = f1_BNW;
+      //(D.f[DIR_MPP])[ktnw ] = f1_BSE;
+
+      //(D.f[DIR_P00])[ke   ] = f1_W   -c2over27*drho1;
+      //(D.f[DIR_M00])[kw   ] = f1_E   -c2over27*drho1;
+      //(D.f[DIR_0P0])[kn   ] = f1_S   -c2over27*drho1;
+      //(D.f[DIR_0M0])[ks   ] = f1_N   -c2over27*drho1;
+      //(D.f[DIR_00P])[kt   ] = f1_B   -c2over27*drho1;
+      //(D.f[DIR_00M])[kb   ] = f1_T   -c2over27*drho1;
+      //(D.f[DIR_PP0])[kne  ] = f1_SW  -c1over54*drho1;
+      //(D.f[DIR_MM0])[ksw  ] = f1_NE  -c1over54*drho1;
+      //(D.f[DIR_PM0])[kse  ] = f1_NW  -c1over54*drho1;
+      //(D.f[DIR_MP0])[knw  ] = f1_SE  -c1over54*drho1;
+      //(D.f[DIR_P0P])[kte  ] = f1_BW  -c1over54*drho1;
+      //(D.f[DIR_M0M])[kbw  ] = f1_TE  -c1over54*drho1;
+      //(D.f[DIR_P0M])[kbe  ] = f1_TW  -c1over54*drho1;
+      //(D.f[DIR_M0P])[ktw  ] = f1_BE  -c1over54*drho1;
+      //(D.f[DIR_0PP])[ktn  ] = f1_BS  -c1over54*drho1;
+      //(D.f[DIR_0MM])[kbs  ] = f1_TN  -c1over54*drho1;
+      //(D.f[DIR_0PM])[kbn  ] = f1_TS  -c1over54*drho1;
+      //(D.f[DIR_0MP])[kts  ] = f1_BN  -c1over54*drho1;
       //(D.f[DIR_000])[kzero] = f1_ZERO-c8over27*drho1;
-      //(D.f[DIR_PPP ])[ktne ] = f1_BSW -c1over216*drho1;
-      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1over216*drho1;
-      //(D.f[DIR_PMP ])[ktse ] = f1_BNW -c1over216*drho1;
-      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1over216*drho1;
-      //(D.f[DIR_PPM ])[kbne ] = f1_TSW -c1over216*drho1;
-      //(D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1over216*drho1;
-      //(D.f[DIR_PMM ])[kbse ] = f1_TNW -c1over216*drho1;
-      //(D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1over216*drho1;
+      //(D.f[DIR_PPP])[ktne ] = f1_BSW -c1over216*drho1;
+      //(D.f[DIR_MMP])[ktsw ] = f1_BNE -c1over216*drho1;
+      //(D.f[DIR_PMP])[ktse ] = f1_BNW -c1over216*drho1;
+      //(D.f[DIR_MPP])[ktnw ] = f1_BSE -c1over216*drho1;
+      //(D.f[DIR_PPM])[kbne ] = f1_TSW -c1over216*drho1;
+      //(D.f[DIR_MMM])[kbsw ] = f1_TNE -c1over216*drho1;
+      //(D.f[DIR_PMM])[kbse ] = f1_TNW -c1over216*drho1;
+      //(D.f[DIR_MPM])[kbnw ] = f1_TSE -c1over216*drho1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
index 8dbf2c670a549f9a6afe581510205c31246b50cb..07fc5853eb7042d5567c38a03cb27418142bf642 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
@@ -16,69 +16,69 @@ __global__ void GetVeloforForcing27( real* DD,
 												unsigned int* neighborX,
 												unsigned int* neighborY,
 												unsigned int* neighborZ,
-												unsigned int size_Mat, 
+												unsigned long long numberOfLBnodes, 
 												bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==false)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -123,33 +123,33 @@ __global__ void GetVeloforForcing27( real* DD,
 		unsigned int ktne = KQK;
 		unsigned int kbsw = neighborZ[ksw];
 		////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[DIR_P00   ])[ke   ];
-		real mfabb = (D.f[DIR_M00   ])[kw   ];
-		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-		real mfbab = (D.f[DIR_0M0   ])[ks   ];
-		real mfbbc = (D.f[DIR_00P   ])[kt   ];
-		real mfbba = (D.f[DIR_00M   ])[kb   ];
-		real mfccb = (D.f[DIR_PP0  ])[kne  ];
-		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-		real mfcab = (D.f[DIR_PM0  ])[kse  ];
-		real mfacb = (D.f[DIR_MP0  ])[knw  ];
-		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-		real mfbac = (D.f[DIR_0MP  ])[kts  ];
+		real mfcbb = (D.f[DIR_P00])[ke   ];
+		real mfabb = (D.f[DIR_M00])[kw   ];
+		real mfbcb = (D.f[DIR_0P0])[kn   ];
+		real mfbab = (D.f[DIR_0M0])[ks   ];
+		real mfbbc = (D.f[DIR_00P])[kt   ];
+		real mfbba = (D.f[DIR_00M])[kb   ];
+		real mfccb = (D.f[DIR_PP0])[kne  ];
+		real mfaab = (D.f[DIR_MM0])[ksw  ];
+		real mfcab = (D.f[DIR_PM0])[kse  ];
+		real mfacb = (D.f[DIR_MP0])[knw  ];
+		real mfcbc = (D.f[DIR_P0P])[kte  ];
+		real mfaba = (D.f[DIR_M0M])[kbw  ];
+		real mfcba = (D.f[DIR_P0M])[kbe  ];
+		real mfabc = (D.f[DIR_M0P])[ktw  ];
+		real mfbcc = (D.f[DIR_0PP])[ktn  ];
+		real mfbaa = (D.f[DIR_0MM])[kbs  ];
+		real mfbca = (D.f[DIR_0PM])[kbn  ];
+		real mfbac = (D.f[DIR_0MP])[kts  ];
 		real mfbbb = (D.f[DIR_000])[kzero];
-		real mfccc = (D.f[DIR_PPP ])[ktne ];
-		real mfaac = (D.f[DIR_MMP ])[ktsw ];
-		real mfcac = (D.f[DIR_PMP ])[ktse ];
-		real mfacc = (D.f[DIR_MPP ])[ktnw ];
-		real mfcca = (D.f[DIR_PPM ])[kbne ];
-		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-		real mfcaa = (D.f[DIR_PMM ])[kbse ];
-		real mfaca = (D.f[DIR_MPM ])[kbnw ];
+		real mfccc = (D.f[DIR_PPP])[ktne ];
+		real mfaac = (D.f[DIR_MMP])[ktsw ];
+		real mfcac = (D.f[DIR_PMP])[ktse ];
+		real mfacc = (D.f[DIR_MPP])[ktnw ];
+		real mfcca = (D.f[DIR_PPM])[kbne ];
+		real mfaaa = (D.f[DIR_MMM])[kbsw ];
+		real mfcaa = (D.f[DIR_PMM])[kbse ];
+		real mfaca = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////////
 		real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 					 	 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index 0079c927373e90c1e408d2c57ace0595bcfdff15..cc8ca53d15ac02686b850a70ab181bb47285a7d1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -1,84 +1,117 @@
-/* Device code */
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SlipBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
-#include "KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipDevice27(real* DD, 
-                                         int* k_Q, 
-                                         real* QQ,
-                                         unsigned int numberOfBCnodes,
-                                         real om1, 
-                                         unsigned int* neighborX,
-                                         unsigned int* neighborY,
-                                         unsigned int* neighborZ,
-                                         unsigned int size_Mat, 
-                                         bool isEvenTimestep)
+__global__ void QSlipDevice27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -99,24 +132,24 @@ __global__ void QSlipDevice27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -156,32 +189,32 @@ __global__ void QSlipDevice27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -206,63 +239,63 @@ __global__ void QSlipDevice27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -659,32 +692,26 @@ __global__ void QSlipDevice27(real* DD,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipDeviceComp27(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -702,7 +729,7 @@ __global__ void QSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -734,32 +761,32 @@ __global__ void QSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -804,7 +831,7 @@ __global__ void QSlipDeviceComp27(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -816,7 +843,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -828,7 +855,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -840,7 +867,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -852,7 +879,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -864,7 +891,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -876,7 +903,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -890,7 +917,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -904,7 +931,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -918,7 +945,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -932,7 +959,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -946,7 +973,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -955,12 +982,12 @@ __global__ void QSlipDeviceComp27(
         if (z == true) VeloZ = c0o1;
 
          velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloZ;
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -974,7 +1001,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -988,7 +1015,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1002,7 +1029,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1017,7 +1044,7 @@ __global__ void QSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1031,7 +1058,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1045,7 +1072,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1060,7 +1087,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1076,7 +1103,7 @@ __global__ void QSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1091,7 +1118,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1106,7 +1133,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1121,7 +1148,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1136,7 +1163,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1151,7 +1178,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1169,34 +1196,53 @@ __global__ void QSlipDeviceComp27(
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 //////////////////////////////////////////////////////////////////////////////
 __global__ void BBSlipDeviceComp27(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -1214,7 +1260,7 @@ __global__ void BBSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1246,32 +1292,32 @@ __global__ void BBSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1316,7 +1362,7 @@ __global__ void BBSlipDeviceComp27(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -1326,7 +1372,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M00])[kw] = getBounceBackDistributionForVeloBC(f_W, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -1336,7 +1382,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_P00])[ke] = getBounceBackDistributionForVeloBC(f_E, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1346,7 +1392,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getBounceBackDistributionForVeloBC(f_S, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1356,7 +1402,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getBounceBackDistributionForVeloBC(f_N, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1366,7 +1412,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_00M])[kb] = getBounceBackDistributionForVeloBC(f_B, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1376,7 +1422,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_00P])[kt] = getBounceBackDistributionForVeloBC(f_T, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1388,7 +1434,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getBounceBackDistributionForVeloBC(f_SW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1400,7 +1446,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getBounceBackDistributionForVeloBC(f_NE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1412,7 +1458,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getBounceBackDistributionForVeloBC(f_NW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1424,7 +1470,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getBounceBackDistributionForVeloBC(f_SE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1436,7 +1482,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getBounceBackDistributionForVeloBC(f_BW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -1444,11 +1490,11 @@ __global__ void BBSlipDeviceComp27(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1460,7 +1506,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getBounceBackDistributionForVeloBC(f_TW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1472,7 +1518,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getBounceBackDistributionForVeloBC(f_BE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1484,7 +1530,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getBounceBackDistributionForVeloBC(f_BS, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1497,7 +1543,7 @@ __global__ void BBSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1509,7 +1555,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getBounceBackDistributionForVeloBC(f_TS, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1521,7 +1567,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getBounceBackDistributionForVeloBC(f_BN, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1535,7 +1581,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1550,7 +1596,7 @@ __global__ void BBSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1564,7 +1610,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getBounceBackDistributionForVeloBC(f_TSW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1578,7 +1624,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getBounceBackDistributionForVeloBC(f_BNE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1592,7 +1638,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getBounceBackDistributionForVeloBC(f_BNW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1606,7 +1652,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getBounceBackDistributionForVeloBC(f_TSE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1620,7 +1666,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getBounceBackDistributionForVeloBC(f_TNW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1638,35 +1684,55 @@ __global__ void BBSlipDeviceComp27(
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 ////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* turbViscosity,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -1684,7 +1750,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1716,32 +1782,32 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1791,7 +1857,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -1803,7 +1869,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -1815,7 +1881,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1827,7 +1893,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1839,7 +1905,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1851,7 +1917,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1863,7 +1929,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1877,7 +1943,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1891,7 +1957,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1905,7 +1971,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1919,7 +1985,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1933,7 +1999,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -1941,13 +2007,13 @@ __global__ void QSlipDeviceComp27TurbViscosity(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54);
+        velocityLB = -vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1961,7 +2027,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1975,7 +2041,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1989,7 +2055,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2004,7 +2070,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2018,7 +2084,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2032,7 +2098,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2047,7 +2113,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2063,7 +2129,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2078,7 +2144,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2093,7 +2159,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2108,7 +2174,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2123,7 +2189,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2138,7 +2204,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2154,37 +2220,59 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
    }
 }
+////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 ////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipPressureDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* turbViscosity,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -2202,7 +2290,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -2234,32 +2322,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2309,7 +2397,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -2321,7 +2409,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -2333,7 +2421,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -2345,7 +2433,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -2357,7 +2445,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -2369,7 +2457,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -2381,7 +2469,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2395,7 +2483,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2409,7 +2497,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2423,7 +2511,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2437,7 +2525,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2451,7 +2539,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -2459,13 +2547,13 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54);
+        velocityLB = -vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2479,7 +2567,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2493,7 +2581,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2507,7 +2595,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2522,7 +2610,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2536,7 +2624,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2550,7 +2638,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2565,7 +2653,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2581,7 +2669,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2596,7 +2684,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2611,7 +2699,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2626,7 +2714,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2641,7 +2729,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2656,7 +2744,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2688,63 +2776,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //    Distributions27 D;
 //    if (isEvenTimestep==true)
 //    {
-//       D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-//       D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-//       D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-//       D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-//       D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-//       D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-//       D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-//       D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-//       D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-//       D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-//       D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-//       D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-//       D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-//       D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-//       D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-//       D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-//       D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-//       D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//       D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-//       D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-//       D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-//       D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-//       D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-//       D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-//       D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-//       D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//       D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+//       D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+//       D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+//       D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+//       D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+//       D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+//       D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+//       D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+//       D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+//       D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+//       D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+//       D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+//       D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+//       D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+//       D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+//       D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+//       D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+//       D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//       D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+//       D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+//       D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+//       D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+//       D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+//       D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+//       D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+//       D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
 //    } 
 //    else
 //    {
-//       D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-//       D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-//       D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-//       D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-//       D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-//       D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-//       D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-//       D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-//       D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-//       D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-//       D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-//       D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-//       D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-//       D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-//       D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-//       D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-//       D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-//       D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//       D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-//       D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-//       D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-//       D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-//       D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-//       D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-//       D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-//       D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//       D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+//       D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+//       D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+//       D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+//       D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+//       D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+//       D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+//       D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+//       D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+//       D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+//       D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+//       D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+//       D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+//       D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+//       D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+//       D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+//       D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+//       D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//       D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+//       D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+//       D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+//       D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+//       D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+//       D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+//       D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+//       D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
 //    }
 //    ////////////////////////////////////////////////////////////////////////////////
 //    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2765,24 +2853,24 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 //             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 //             *q_dirBSE, *q_dirBNW; 
-//       q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-//       q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-//       q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-//       q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-//       q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-//       q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-//       q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-//       q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-//       q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-//       q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-//       q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-//       q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-//       q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-//       q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-//       q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-//       q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-//       q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-//       q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+//       q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+//       q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+//       q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+//       q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+//       q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+//       q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+//       q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+//       q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+//       q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+//       q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+//       q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+//       q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+//       q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+//       q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+//       q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+//       q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+//       q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+//       q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 //       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 //       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 //       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2823,32 +2911,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //       unsigned int kbsw = neighborZ[ksw];
       
 //       ////////////////////////////////////////////////////////////////////////////////
-//       real f_W    = (D.f[DIR_P00   ])[ke   ];
-//       real f_E    = (D.f[DIR_M00   ])[kw   ];
-//       real f_S    = (D.f[DIR_0P0   ])[kn   ];
-//       real f_N    = (D.f[DIR_0M0   ])[ks   ];
-//       real f_B    = (D.f[DIR_00P   ])[kt   ];
-//       real f_T    = (D.f[DIR_00M   ])[kb   ];
-//       real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-//       real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-//       real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-//       real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-//       real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-//       real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-//       real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-//       real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-//       real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-//       real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-//       real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-//       real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-//       real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-//       real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-//       real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-//       real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-//       real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-//       real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-//       real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-//       real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+//       real f_W    = (D.f[DIR_P00])[ke   ];
+//       real f_E    = (D.f[DIR_M00])[kw   ];
+//       real f_S    = (D.f[DIR_0P0])[kn   ];
+//       real f_N    = (D.f[DIR_0M0])[ks   ];
+//       real f_B    = (D.f[DIR_00P])[kt   ];
+//       real f_T    = (D.f[DIR_00M])[kb   ];
+//       real f_SW   = (D.f[DIR_PP0])[kne  ];
+//       real f_NE   = (D.f[DIR_MM0])[ksw  ];
+//       real f_NW   = (D.f[DIR_PM0])[kse  ];
+//       real f_SE   = (D.f[DIR_MP0])[knw  ];
+//       real f_BW   = (D.f[DIR_P0P])[kte  ];
+//       real f_TE   = (D.f[DIR_M0M])[kbw  ];
+//       real f_TW   = (D.f[DIR_P0M])[kbe  ];
+//       real f_BE   = (D.f[DIR_M0P])[ktw  ];
+//       real f_BS   = (D.f[DIR_0PP])[ktn  ];
+//       real f_TN   = (D.f[DIR_0MM])[kbs  ];
+//       real f_TS   = (D.f[DIR_0PM])[kbn  ];
+//       real f_BN   = (D.f[DIR_0MP])[kts  ];
+//       real f_BSW  = (D.f[DIR_PPP])[ktne ];
+//       real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+//       real f_BNW  = (D.f[DIR_PMP])[ktse ];
+//       real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+//       real f_TSW  = (D.f[DIR_PPM])[kbne ];
+//       real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+//       real f_TNW  = (D.f[DIR_PMM])[kbse ];
+//       real f_TSE  = (D.f[DIR_MPM])[kbnw ];
 //       ////////////////////////////////////////////////////////////////////////////////
 //       real vx1, vx2, vx3, drho, feq, q;
 //       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2873,63 +2961,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //       //////////////////////////////////////////////////////////////////////////
 //       if (isEvenTimestep==false)
 //       {
-//          D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-//          D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-//          D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-//          D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-//          D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-//          D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-//          D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-//          D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-//          D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-//          D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-//          D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-//          D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-//          D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-//          D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-//          D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-//          D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-//          D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-//          D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//          D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-//          D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-//          D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-//          D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-//          D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-//          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-//          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-//          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//          D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+//          D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+//          D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+//          D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+//          D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+//          D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+//          D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+//          D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+//          D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+//          D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+//          D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+//          D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+//          D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+//          D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+//          D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+//          D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+//          D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+//          D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//          D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+//          D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+//          D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+//          D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+//          D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+//          D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+//          D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+//          D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
 //       } 
 //       else
 //       {
-//          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-//          D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-//          D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-//          D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-//          D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-//          D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-//          D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-//          D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-//          D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-//          D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-//          D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-//          D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-//          D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-//          D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-//          D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-//          D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-//          D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-//          D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//          D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-//          D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-//          D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-//          D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-//          D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-//          D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-//          D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-//          D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//          D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+//          D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+//          D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+//          D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+//          D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+//          D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+//          D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+//          D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+//          D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+//          D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+//          D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+//          D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+//          D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+//          D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+//          D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+//          D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+//          D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+//          D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//          D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+//          D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+//          D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+//          D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+//          D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+//          D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+//          D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+//          D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
 //       }
 //       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //       //Test
@@ -3378,80 +3466,81 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipGeomDeviceComp27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 real om1, 
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QSlipGeomDeviceComp27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int  numberOfBCnodes,
+    real om1, 
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3472,24 +3561,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3504,24 +3593,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
       nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
       nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
       nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -3536,24 +3625,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
       ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
       ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
       ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -3568,24 +3657,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
       nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
       nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
       nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -3625,32 +3714,32 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -3675,63 +3764,63 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
@@ -4264,80 +4353,81 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipNormDeviceComp27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 real om1, 
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QSlipNormDeviceComp27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int  numberOfBCnodes,
+    real om1, 
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4358,24 +4448,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4390,24 +4480,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
       nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
       nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
       nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -4422,24 +4512,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
       ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
       ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
       ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -4454,24 +4544,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
       nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
       nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
       nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -4511,32 +4601,32 @@ __global__ void QSlipNormDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -4561,63 +4651,63 @@ __global__ void QSlipNormDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 74e2faa38638228aa5d499aa74226405ab109f7d..3208299e93940dabe52faa7d0b3c684c45596660 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -43,28 +43,30 @@
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
-#include "KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
-__host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
-                                                         real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
-                                                         real* vx, real* vy, real* vz,
-                                                         real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
-                                                         real* vx_w_mean,  real* vy_w_mean,  real* vz_w_mean,  //!>mean (temporally filtered) velocities at wall-adjactent node
-                                                         real  vx_w_inst,  real  vy_w_inst,  real  vz_w_inst,  //!>instantaneous velocities at wall-adjactent node
-                                                         real  rho,
-                                                         int* samplingOffset,
-                                                         real q,
-                                                         real forceFactor,                                     //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015)
-                                                         real eps,                                             //!>filter constant in temporal averaging
-                                                         real* z0,                                             //!>aerodynamic roughness length
-                                                         bool  hasWallModelMonitor,
-                                                         real* u_star_monitor,
-                                                         real wallMomentumX, real wallMomentumY, real wallMomentumZ,
-                                                         real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ)
+__host__ __device__ __forceinline__ void iMEM(
+    uint k, uint kN,
+    real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
+    real* vx, real* vy, real* vz,
+    real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
+    real* vx_w_mean,  real* vy_w_mean,  real* vz_w_mean,  //!>mean (temporally filtered) velocities at wall-adjactent node
+    real  vx_w_inst,  real  vy_w_inst,  real  vz_w_inst,  //!>instantaneous velocities at wall-adjactent node
+    real  rho,
+    int* samplingOffset,
+    real q,
+    real forceFactor,                                     //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015)
+    real eps,                                             //!>filter constant in temporal averaging
+    real* z0,                                             //!>aerodynamic roughness length
+    bool  hasWallModelMonitor,
+    real* u_star_monitor,
+    real wallMomentumX, real wallMomentumY, real wallMomentumZ,
+    real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ)
 {
       real wallNormalX = _wallNormalX[k];
       real wallNormalY = _wallNormalY[k];
@@ -136,99 +138,100 @@ __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
 }
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QStressDeviceComp27(real* DD,
-											   int* k_Q,
-                                    int* k_N,
-											   real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    real* turbViscosity,
-                                    real* vx,
-                                    real* vy,
-                                    real* vz,
-                                    real* normalX,
-                                    real* normalY,
-                                    real* normalZ,
-                                    real* vx_el,
-                                    real* vy_el,
-                                    real* vz_el,
-                                    real* vx_w_mean,
-                                    real* vy_w_mean,
-                                    real* vz_w_mean,
-                                    int* samplingOffset,
-                                    real* z0,
-                                    bool  hasWallModelMonitor,
-                                    real* u_star_monitor,
-                                    real* Fx_monitor,
-                                    real* Fy_monitor,
-                                    real* Fz_monitor,
-											   unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
+__global__ void QStressDeviceComp27(
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* turbViscosity,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* normalX,
+    real* normalY,
+    real* normalZ,
+    real* vx_el,
+    real* vy_el,
+    real* vz_el,
+    real* vx_w_mean,
+    real* vy_w_mean,
+    real* vz_w_mean,
+    int* samplingOffset,
+    real* z0,
+    bool  hasWallModelMonitor,
+    real* u_star_monitor,
+    real* Fx_monitor,
+    real* Fy_monitor,
+    real* Fz_monitor,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
 
    Distributions27 D;
    if (isEvenTimestep==true)//get right array of post coll f's
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -249,24 +252,24 @@ __global__ void QStressDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -309,32 +312,32 @@ __global__ void QStressDeviceComp27(real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];     //post-coll f's
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];     //post-coll f's
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
@@ -361,63 +364,63 @@ __global__ void QStressDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)      //get adress where incoming f's should be written to
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Compute incoming f's with zero wall velocity
@@ -968,69 +971,69 @@ __global__ void BBStressDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -1051,24 +1054,24 @@ __global__ void BBStressDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1112,32 +1115,32 @@ __global__ void BBStressDevice27( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho;
@@ -1161,63 +1164,63 @@ __global__ void BBStressDevice27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
@@ -1715,69 +1718,69 @@ __global__ void BBStressPressureDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -1798,24 +1801,24 @@ __global__ void BBStressPressureDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1859,32 +1862,32 @@ __global__ void BBStressPressureDevice27( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho;
@@ -1908,63 +1911,63 @@ __global__ void BBStressPressureDevice27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
index 55f810628f370976289d1492e9916d5d3fa0dbb8..b96d961c9b92ae5d041beeb23482d7144e7a8acb 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
@@ -27,69 +27,69 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
-	uint size_Mat, 
+	unsigned long long numberOfLBnodes, 
 	bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -114,24 +114,24 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -174,32 +174,32 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -456,69 +456,69 @@ __global__ void QDeviceCompThinWallsPartOne27(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -539,24 +539,24 @@ __global__ void QDeviceCompThinWallsPartOne27(
 			*q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW;
-		q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -882,7 +882,7 @@ __global__ void QThinWallsPartTwo27(
 	uint* neighborY,
 	uint* neighborZ,
 	uint* neighborWSB,
-	uint size_Mat, 
+	unsigned long long numberOfLBnodes, 
 	bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -904,24 +904,24 @@ __global__ void QThinWallsPartTwo27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -980,123 +980,123 @@ __global__ void QThinWallsPartTwo27(
 	  Distributions27 D, DN;
 	  if (isEvenTimestep == true)
 	  {
-		  D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		  D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		  D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		  D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		  D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		  D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		  D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		  D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		  D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		  D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		  D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		  D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		  D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		  D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		  D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		  D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		  D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		  D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		  D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		  D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		  D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		  D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		  D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		  D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		  D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		  D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		  D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		  D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		  D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		  D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		  D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		  D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		  D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		  D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		  D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		  D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		  D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		  D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		  D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		  D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		  D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		  D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		  D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		  D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		  D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		  D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		  D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		  D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		  D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		  D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		  D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		  D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		  D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	  }
 	  else
 	  {
-		  D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		  D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		  D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		  D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		  D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		  D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		  D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		  D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		  D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		  D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		  D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		  D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		  D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		  D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		  D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		  D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		  D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		  D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		  D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		  D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		  D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		  D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		  D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		  D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		  D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		  D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		  D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		  D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		  D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		  D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		  D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		  D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		  D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		  D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		  D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		  D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		  D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		  D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		  D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		  D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		  D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		  D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		  D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		  D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		  D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		  D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		  D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		  D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		  D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		  D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		  D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		  D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		  D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	  }
 	  if (isEvenTimestep==false)
       {
-         DN.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         DN.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         DN.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         DN.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         DN.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         DN.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         DN.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         DN.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         DN.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         DN.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         DN.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         DN.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         DN.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         DN.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         DN.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         DN.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         DN.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         DN.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
-         DN.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         DN.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         DN.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         DN.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         DN.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         DN.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         DN.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         DN.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         DN.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         DN.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         DN.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         DN.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         DN.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         DN.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         DN.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         DN.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         DN.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         DN.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         DN.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         DN.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         DN.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         DN.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         DN.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         DN.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         DN.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         DN.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         DN.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         DN.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         DN.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         DN.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         DN.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         DN.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         DN.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         DN.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         DN.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         DN.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         DN.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         DN.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         DN.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         DN.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         DN.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         DN.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         DN.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         DN.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         DN.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         DN.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         DN.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         DN.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         DN.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         DN.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         DN.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         DN.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
-         DN.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         DN.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         DN.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         DN.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         DN.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         DN.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         DN.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         DN.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         DN.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         DN.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         DN.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         DN.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         DN.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         DN.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         DN.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         DN.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         DN.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         DN.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         DN.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         DN.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         DN.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         DN.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         DN.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         DN.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         DN.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         DN.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         DN.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         DN.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         DN.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         DN.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         DN.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         DN.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         DN.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         DN.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //directions allways exchange
@@ -1106,24 +1106,24 @@ __global__ void QThinWallsPartTwo27(
 	  //( 1  1  1) ( 1  0  0) ( 0  1  0) ( 0  0  1) ( 1  1  0) ( 1  0  1) ( 0  1  1) (-1 -1  1) (-1  1 -1) ( 1 -1 -1) (-1  1  0) (-1  0  1) ( 0 -1  1)
 	  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q, tmp;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[DIR_M00  ])[kw  ]; (DN.f[DIR_M00  ])[kw  ]=(D.f[DIR_M00  ])[kw  ]; (D.f[DIR_M00  ])[kw  ]=tmp;}}
-	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P00  ])[ke  ]; (DN.f[DIR_P00  ])[ke  ]=(D.f[DIR_P00  ])[ke  ]; (D.f[DIR_P00  ])[ke  ]=tmp;}}
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[DIR_0M0  ])[ks  ]; (DN.f[DIR_0M0  ])[ks  ]=(D.f[DIR_0M0  ])[ks  ]; (D.f[DIR_0M0  ])[ks  ]=tmp;}}
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0P0  ])[kn  ]; (DN.f[DIR_0P0  ])[kn  ]=(D.f[DIR_0P0  ])[kn  ]; (D.f[DIR_0P0  ])[kn  ]=tmp;}}
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[DIR_00M  ])[kb  ]; (DN.f[DIR_00M  ])[kb  ]=(D.f[DIR_00M  ])[kb  ]; (D.f[DIR_00M  ])[kb  ]=tmp;}}
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_00P  ])[kt  ]; (DN.f[DIR_00P  ])[kt  ]=(D.f[DIR_00P  ])[kt  ]; (D.f[DIR_00P  ])[kt  ]=tmp;}}
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0 ])[ksw ]; (DN.f[DIR_MM0 ])[ksw ]=(D.f[DIR_MM0 ])[ksw ]; (D.f[DIR_MM0 ])[ksw ]=tmp;}}
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PP0 ])[kne ]; (DN.f[DIR_PP0 ])[kne ]=(D.f[DIR_PP0 ])[kne ]; (D.f[DIR_PP0 ])[kne ]=tmp;}}
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MP0 ])[knw ]; (DN.f[DIR_MP0 ])[knw ]=(D.f[DIR_MP0 ])[knw ]; (D.f[DIR_MP0 ])[knw ]=tmp;}}
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0 ])[kse ]; (DN.f[DIR_PM0 ])[kse ]=(D.f[DIR_PM0 ])[kse ]; (D.f[DIR_PM0 ])[kse ]=tmp;}}
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M ])[kbw ]; (DN.f[DIR_M0M ])[kbw ]=(D.f[DIR_M0M ])[kbw ]; (D.f[DIR_M0M ])[kbw ]=tmp;}}
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P0P ])[kte ]; (DN.f[DIR_P0P ])[kte ]=(D.f[DIR_P0P ])[kte ]; (D.f[DIR_P0P ])[kte ]=tmp;}}
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_M0P ])[ktw ]; (DN.f[DIR_M0P ])[ktw ]=(D.f[DIR_M0P ])[ktw ]; (D.f[DIR_M0P ])[ktw ]=tmp;}}
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M ])[kbe ]; (DN.f[DIR_P0M ])[kbe ]=(D.f[DIR_P0M ])[kbe ]; (D.f[DIR_P0M ])[kbe ]=tmp;}}
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM ])[kbs ]; (DN.f[DIR_0MM ])[kbs ]=(D.f[DIR_0MM ])[kbs ]; (D.f[DIR_0MM ])[kbs ]=tmp;}}
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0PP ])[ktn ]; (DN.f[DIR_0PP ])[ktn ]=(D.f[DIR_0PP ])[ktn ]; (D.f[DIR_0PP ])[ktn ]=tmp;}}
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0MP ])[kts ]; (DN.f[DIR_0MP ])[kts ]=(D.f[DIR_0MP ])[kts ]; (D.f[DIR_0MP ])[kts ]=tmp;}}
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM ])[kbn ]; (DN.f[DIR_0PM ])[kbn ]=(D.f[DIR_0PM ])[kbn ]; (D.f[DIR_0PM ])[kbn ]=tmp;}}
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[DIR_M00])[kw  ]; (DN.f[DIR_M00])[kw  ]=(D.f[DIR_M00])[kw  ]; (D.f[DIR_M00])[kw  ]=tmp;}}
+	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P00])[ke  ]; (DN.f[DIR_P00])[ke  ]=(D.f[DIR_P00])[ke  ]; (D.f[DIR_P00])[ke  ]=tmp;}}
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[DIR_0M0])[ks  ]; (DN.f[DIR_0M0])[ks  ]=(D.f[DIR_0M0])[ks  ]; (D.f[DIR_0M0])[ks  ]=tmp;}}
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0P0])[kn  ]; (DN.f[DIR_0P0])[kn  ]=(D.f[DIR_0P0])[kn  ]; (D.f[DIR_0P0])[kn  ]=tmp;}}
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[DIR_00M])[kb  ]; (DN.f[DIR_00M])[kb  ]=(D.f[DIR_00M])[kb  ]; (D.f[DIR_00M])[kb  ]=tmp;}}
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_00P])[kt  ]; (DN.f[DIR_00P])[kt  ]=(D.f[DIR_00P])[kt  ]; (D.f[DIR_00P])[kt  ]=tmp;}}
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0])[ksw ]; (DN.f[DIR_MM0])[ksw ]=(D.f[DIR_MM0])[ksw ]; (D.f[DIR_MM0])[ksw ]=tmp;}}
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PP0])[kne ]; (DN.f[DIR_PP0])[kne ]=(D.f[DIR_PP0])[kne ]; (D.f[DIR_PP0])[kne ]=tmp;}}
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MP0])[knw ]; (DN.f[DIR_MP0])[knw ]=(D.f[DIR_MP0])[knw ]; (D.f[DIR_MP0])[knw ]=tmp;}}
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0])[kse ]; (DN.f[DIR_PM0])[kse ]=(D.f[DIR_PM0])[kse ]; (D.f[DIR_PM0])[kse ]=tmp;}}
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M])[kbw ]; (DN.f[DIR_M0M])[kbw ]=(D.f[DIR_M0M])[kbw ]; (D.f[DIR_M0M])[kbw ]=tmp;}}
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P0P])[kte ]; (DN.f[DIR_P0P])[kte ]=(D.f[DIR_P0P])[kte ]; (D.f[DIR_P0P])[kte ]=tmp;}}
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_M0P])[ktw ]; (DN.f[DIR_M0P])[ktw ]=(D.f[DIR_M0P])[ktw ]; (D.f[DIR_M0P])[ktw ]=tmp;}}
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M])[kbe ]; (DN.f[DIR_P0M])[kbe ]=(D.f[DIR_P0M])[kbe ]; (D.f[DIR_P0M])[kbe ]=tmp;}}
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM])[kbs ]; (DN.f[DIR_0MM])[kbs ]=(D.f[DIR_0MM])[kbs ]; (D.f[DIR_0MM])[kbs ]=tmp;}}
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0PP])[ktn ]; (DN.f[DIR_0PP])[ktn ]=(D.f[DIR_0PP])[ktn ]; (D.f[DIR_0PP])[ktn ]=tmp;}}
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0MP])[kts ]; (DN.f[DIR_0MP])[kts ]=(D.f[DIR_0MP])[kts ]; (D.f[DIR_0MP])[kts ]=tmp;}}
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM])[kbn ]; (DN.f[DIR_0PM])[kbn ]=(D.f[DIR_0PM])[kbn ]; (D.f[DIR_0PM])[kbn ]=tmp;}}
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbsw] < GEO_FLUID){tmp = (DN.f[DIR_MMM])[kbsw]; (DN.f[DIR_MMM])[kbsw]=(D.f[DIR_MMM])[kbsw]; (D.f[DIR_MMM])[kbsw]=tmp;}}
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PPP])[ktne]; (DN.f[DIR_PPP])[ktne]=(D.f[DIR_PPP])[ktne]; (D.f[DIR_PPP])[ktne]=tmp;}}
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MMP])[ktsw]; (DN.f[DIR_MMP])[ktsw]=(D.f[DIR_MMP])[ktsw]; (D.f[DIR_MMP])[ktsw]=tmp;}}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
index f8cf8ab13c39d55477bf006cd27f7943dcb5b53a..3f440454ef272b13c24fe2a2882d67d32d32a841 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
@@ -9,14 +9,16 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "lbm/constants/NumericConstants.h"
 
 #include "lbm/MacroscopicQuantities.h"
 #include "../Kernel/Utilities/DistributionHelper.cuh"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void CalcTurbulenceIntensity(
@@ -34,19 +36,21 @@ __global__ void CalcTurbulenceIntensity(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat, 
+   unsigned long long numberOfLBnodes, 
    bool isEvenTimestep)
 {
-   const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-   if (k >= size_Mat)
+   if (nodeIndex >= numberOfLBnodes)
        return;
 
-   if (!vf::gpu::isValidFluidNode(typeOfGridNode[k]))
+   if (!isValidFluidNode(typeOfGridNode[nodeIndex]))
        return;
 
-   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
-                                              neighborZ);
+   DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ);
    const auto &distribution = distr_wrapper.distribution;
 
    // analogue to LBCalcMacCompSP27
@@ -58,16 +62,16 @@ __global__ void CalcTurbulenceIntensity(
 
    // compute subtotals:
    // fluctuations
-   vxx[k] = vxx[k] + vx * vx;
-   vyy[k] = vyy[k] + vy * vy;
-   vzz[k] = vzz[k] + vz * vz;
-   vxy[k] = vxy[k] + vx * vy;
-   vxz[k] = vxz[k] + vx * vz;
-   vyz[k] = vyz[k] + vy * vz;
+   vxx[nodeIndex] = vxx[nodeIndex] + vx * vx;
+   vyy[nodeIndex] = vyy[nodeIndex] + vy * vy;
+   vzz[nodeIndex] = vzz[nodeIndex] + vz * vz;
+   vxy[nodeIndex] = vxy[nodeIndex] + vx * vy;
+   vxz[nodeIndex] = vxz[nodeIndex] + vx * vz;
+   vyz[nodeIndex] = vyz[nodeIndex] + vy * vz;
 
    // velocity (for mean velocity)
-   vx_mean[k] = vx_mean[k] + vx;
-   vy_mean[k] = vy_mean[k] + vy;
-   vz_mean[k] = vz_mean[k] + vz; 
+   vx_mean[nodeIndex] = vx_mean[nodeIndex] + vx;
+   vy_mean[nodeIndex] = vy_mean[nodeIndex] + vy;
+   vz_mean[nodeIndex] = vz_mean[nodeIndex] + vz; 
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
index eb301515527a9e8a3056676b0d4dffe8197c7dbe..58856f624fa1dfd2488c3061721e9dac53a67d07 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
@@ -53,8 +53,8 @@ __inline__ __device__ real calcTurbulentViscosityQR(real C, real dxux, real dyuy
         //! Second invariant of the strain-rate tensor
         real Q = c1o2*( dxux*dxux + dyuy*dyuy + dzuz*dzuz ) + c1o4*( Dxy*Dxy + Dxz*Dxz + Dyz*Dyz);
         //! Third invariant of the strain-rate tensor (determinant)
-        real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy );
-        
+        // real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy );
+        real R = - dxux*dyuy*dzuz + c1o4*( -Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy );
         return C * max(R, c0o1) / Q;
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
index 3719ca3712e6f63a77f62bf314af7d19eea01f4c..7147629c448b8b730e4ae8c4eff8a0a400863de9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
@@ -38,6 +38,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "LBM/LB.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 
@@ -52,34 +53,31 @@ __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM
     dvz = ((fluidP ? vz[kP] : vz[k])-(fluidM ? vz[kM] : vz[k]))*div;
 }
 
-__global__ void calcAMD(real* vx,
-                        real* vy,
-                        real* vz,
-                        real* turbulentViscosity,
-                        uint* neighborX,
-                        uint* neighborY,
-                        uint* neighborZ,
-                        uint* neighborWSB,
-                        uint* typeOfGridNode,
-                        uint size_Mat,
-                        real SGSConstant)
+__global__ void calcAMD(
+    real* vx,
+    real* vy,
+    real* vz,
+    real* turbulentViscosity,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighborWSB,
+    uint* typeOfGridNode,
+    unsigned long long numberOfLBnodes,
+    real SGSConstant)
 {
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
 
-    const uint x = threadIdx.x; 
-    const uint y = blockIdx.x; 
-    const uint z = blockIdx.y; 
+    if(nodeIndex >= numberOfLBnodes) return;
+    if(typeOfGridNode[nodeIndex] != GEO_FLUID) return;
 
-    const uint nx = blockDim.x;
-    const uint ny = gridDim.x;
-
-    const uint k = nx*(ny*z + y) + x;
-    if(k >= size_Mat) return;
-    if(typeOfGridNode[k] != GEO_FLUID) return;
-
-    uint kPx = neighborX[k];
-    uint kPy = neighborY[k];
-    uint kPz = neighborZ[k];
-    uint kMxyz = neighborWSB[k];
+    uint kPx = neighborX[nodeIndex];
+    uint kPy = neighborY[nodeIndex];
+    uint kPz = neighborZ[nodeIndex];
+    uint kMxyz = neighborWSB[nodeIndex];
     uint kMx = neighborZ[neighborY[kMxyz]];
     uint kMy = neighborZ[neighborX[kMxyz]];
     uint kMz = neighborY[neighborX[kMxyz]];
@@ -88,9 +86,9 @@ __global__ void calcAMD(real* vx,
          dvydx, dvydy, dvydz,
          dvzdx, dvzdy, dvzdz;
 
-    calcDerivatives(k, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx);
-    calcDerivatives(k, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy);
-    calcDerivatives(k, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz);
+    calcDerivatives(nodeIndex, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx);
+    calcDerivatives(nodeIndex, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy);
+    calcDerivatives(nodeIndex, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz);
 
     real denominator =  dvxdx*dvxdx + dvydx*dvydx + dvzdx*dvzdx + 
                         dvxdy*dvxdy + dvydy*dvydy + dvzdy*dvzdy +
@@ -102,7 +100,7 @@ __global__ void calcAMD(real* vx,
                         (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + 
                         (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy);
 
-    turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator;
+    turbulentViscosity[nodeIndex] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1;
 }
 
 void calcTurbulentViscosityAMD(Parameter* para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
index 05c85e8b546aeaa964b1dbb61cbf01dd9b82ca1a..ccf9d1771ec0e1895e5cb79fae63675429b02c73 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
@@ -1,96 +1,120 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VelocityBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
-#include "KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompPlusSlip27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD, 
-													int* k_Q, 
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -115,24 +139,24 @@ __global__ void QVelDeviceCompPlusSlip27(
 		   *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 		   *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 		   *q_dirBSE, *q_dirBNW;
-	   q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
-	   q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
-	   q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
-	   q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
-	   q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
-	   q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
-	   q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
-	   q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
-	   q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
-	   q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
-	   q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
-	   q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
-	   q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
-	   q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
-	   q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
-	   q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
-	   q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
-	   q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+	   q_dirE = &QQ[DIR_P00 * numberOfBCnodes];
+	   q_dirW = &QQ[DIR_M00 * numberOfBCnodes];
+	   q_dirN = &QQ[DIR_0P0 * numberOfBCnodes];
+	   q_dirS = &QQ[DIR_0M0 * numberOfBCnodes];
+	   q_dirT = &QQ[DIR_00P * numberOfBCnodes];
+	   q_dirB = &QQ[DIR_00M * numberOfBCnodes];
+	   q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes];
+	   q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes];
+	   q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes];
+	   q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes];
+	   q_dirTE = &QQ[DIR_P0P * numberOfBCnodes];
+	   q_dirBW = &QQ[DIR_M0M * numberOfBCnodes];
+	   q_dirBE = &QQ[DIR_P0M * numberOfBCnodes];
+	   q_dirTW = &QQ[DIR_M0P * numberOfBCnodes];
+	   q_dirTN = &QQ[DIR_0PP * numberOfBCnodes];
+	   q_dirBS = &QQ[DIR_0MM * numberOfBCnodes];
+	   q_dirBN = &QQ[DIR_0PM * numberOfBCnodes];
+	   q_dirTS = &QQ[DIR_0MP * numberOfBCnodes];
 	   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 	   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 	   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -225,63 +249,63 @@ __global__ void QVelDeviceCompPlusSlip27(
 	   //////////////////////////////////////////////////////////////////////////
 	   if (isEvenTimestep == false)
 	   {
-		   D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		   D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		   D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		   D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		   D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		   D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		   D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		   D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		   D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		   D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		   D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		   D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		   D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		   D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		   D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		   D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		   D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		   D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		   D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		   D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		   D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		   D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		   D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		   D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		   D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		   D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		   D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		   D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		   D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		   D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		   D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		   D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		   D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		   D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		   D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		   D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		   D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		   D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		   D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		   D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		   D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		   D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		   D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		   D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		   D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		   D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		   D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		   D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		   D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		   D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		   D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		   D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	   }
 	   else
 	   {
-		   D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		   D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		   D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		   D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		   D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		   D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		   D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		   D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		   D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		   D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		   D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		   D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		   D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		   D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		   D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		   D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		   D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		   D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		   D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		   D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		   D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		   D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		   D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		   D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		   D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		   D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		   D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		   D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		   D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		   D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		   D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		   D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		   D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		   D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		   D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		   D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		   D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		   D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		   D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		   D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		   D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		   D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		   D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		   D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		   D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		   D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		   D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		   D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		   D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		   D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		   D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		   D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	   }
 	   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	   //Test
@@ -553,18 +577,19 @@ __global__ void QVelDeviceCompPlusSlip27(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QVeloDeviceEQ27(real* VeloX,
-										   real* VeloY,
-										   real* VeloZ,
-                                           real* DD, 
-                                           int* k_Q, 
-                                           int numberOfBCnodes, 
-                                           real om1, 
-                                           unsigned int* neighborX,
-                                           unsigned int* neighborY,
-                                           unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
-                                           bool isEvenTimestep)
+__global__ void QVeloDeviceEQ27(
+    real* VeloX,
+    real* VeloY,
+    real* VeloZ,
+    real* DD, 
+    int* k_Q, 
+    int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -613,95 +638,95 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[ke   ];
-			real mfabb = (D.f[DIR_M00   ])[kw   ];
-			real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks   ];
-			real mfbbc = (D.f[DIR_00P   ])[kt   ];
-			real mfbba = (D.f[DIR_00M   ])[kb   ];
-			real mfccb = (D.f[DIR_PP0  ])[kne  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-			real mfcab = (D.f[DIR_PM0  ])[kse  ];
-			real mfacb = (D.f[DIR_MP0  ])[knw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-			real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-			real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-			real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-			real mfbac = (D.f[DIR_0MP  ])[kts  ];
+			real mfcbb = (D.f[DIR_P00])[ke   ];
+			real mfabb = (D.f[DIR_M00])[kw   ];
+			real mfbcb = (D.f[DIR_0P0])[kn   ];
+			real mfbab = (D.f[DIR_0M0])[ks   ];
+			real mfbbc = (D.f[DIR_00P])[kt   ];
+			real mfbba = (D.f[DIR_00M])[kb   ];
+			real mfccb = (D.f[DIR_PP0])[kne  ];
+			real mfaab = (D.f[DIR_MM0])[ksw  ];
+			real mfcab = (D.f[DIR_PM0])[kse  ];
+			real mfacb = (D.f[DIR_MP0])[knw  ];
+			real mfcbc = (D.f[DIR_P0P])[kte  ];
+			real mfaba = (D.f[DIR_M0M])[kbw  ];
+			real mfcba = (D.f[DIR_P0M])[kbe  ];
+			real mfabc = (D.f[DIR_M0P])[ktw  ];
+			real mfbcc = (D.f[DIR_0PP])[ktn  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs  ];
+			real mfbca = (D.f[DIR_0PM])[kbn  ];
+			real mfbac = (D.f[DIR_0MP])[kts  ];
 			real mfbbb = (D.f[DIR_000])[kzero];
-			real mfccc = (D.f[DIR_PPP ])[ktne ];
-			real mfaac = (D.f[DIR_MMP ])[ktsw ];
-			real mfcac = (D.f[DIR_PMP ])[ktse ];
-			real mfacc = (D.f[DIR_MPP ])[ktnw ];
-			real mfcca = (D.f[DIR_PPM ])[kbne ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-			real mfcaa = (D.f[DIR_PMM ])[kbse ];
-			real mfaca = (D.f[DIR_MPM ])[kbnw ];
+			real mfccc = (D.f[DIR_PPP])[ktne ];
+			real mfaac = (D.f[DIR_MMP])[ktsw ];
+			real mfcac = (D.f[DIR_PMP])[ktse ];
+			real mfacc = (D.f[DIR_MPP])[ktnw ];
+			real mfcca = (D.f[DIR_PPM])[kbne ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw ];
+			real mfcaa = (D.f[DIR_PMM])[kbse ];
+			real mfaca = (D.f[DIR_MPM])[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 							 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
@@ -763,33 +788,33 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
 			mfcaa = -rho * XXc * YYa * ZZa - c1o216;
 			mfaca = -rho * XXa * YYc * ZZa - c1o216;
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+			(D.f[DIR_P00])[ke   ] = mfabb;//mfcbb;
+			(D.f[DIR_M00])[kw   ] = mfcbb;//mfabb;
+			(D.f[DIR_0P0])[kn   ] = mfbab;//mfbcb;
+			(D.f[DIR_0M0])[ks   ] = mfbcb;//mfbab;
+			(D.f[DIR_00P])[kt   ] = mfbba;//mfbbc;
+			(D.f[DIR_00M])[kb   ] = mfbbc;//mfbba;
+			(D.f[DIR_PP0])[kne  ] = mfaab;//mfccb;
+			(D.f[DIR_MM0])[ksw  ] = mfccb;//mfaab;
+			(D.f[DIR_PM0])[kse  ] = mfacb;//mfcab;
+			(D.f[DIR_MP0])[knw  ] = mfcab;//mfacb;
+			(D.f[DIR_P0P])[kte  ] = mfaba;//mfcbc;
+			(D.f[DIR_M0M])[kbw  ] = mfcbc;//mfaba;
+			(D.f[DIR_P0M])[kbe  ] = mfabc;//mfcba;
+			(D.f[DIR_M0P])[ktw  ] = mfcba;//mfabc;
+			(D.f[DIR_0PP])[ktn  ] = mfbaa;//mfbcc;
+			(D.f[DIR_0MM])[kbs  ] = mfbcc;//mfbaa;
+			(D.f[DIR_0PM])[kbn  ] = mfbac;//mfbca;
+			(D.f[DIR_0MP])[kts  ] = mfbca;//mfbac;
 			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
+			(D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc;
+			(D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac;
+			(D.f[DIR_PMP])[ktse ] = mfaca;//mfcac;
+			(D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc;
+			(D.f[DIR_PPM])[kbne ] = mfaac;//mfcca;
+			(D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa;
+			(D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa;
+			(D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -834,18 +859,18 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QVeloStreetDeviceEQ27(
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep)
+    real* veloXfraction,
+    real* veloYfraction,
+    int*  naschVelo,
+    real* DD,
+    int*  naschIndex,
+    int   numberOfStreetNodes,
+    real  velocityRatio,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool  isEvenTimestep)
 {
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -894,95 +919,95 @@ __global__ void QVeloStreetDeviceEQ27(
 		Distributions27 D;
 		if (isEvenTimestep == true)
 		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 		}
 
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		// based on BGK Plus Comp
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[DIR_P00   ])[ke   ];
-		real mfabb = (D.f[DIR_M00   ])[kw   ];
-		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-		real mfbab = (D.f[DIR_0M0   ])[ks   ];
-		real mfbbc = (D.f[DIR_00P   ])[kt   ];
-		real mfbba = (D.f[DIR_00M   ])[kb   ];
-		real mfccb = (D.f[DIR_PP0  ])[kne  ];
-		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-		real mfcab = (D.f[DIR_PM0  ])[kse  ];
-		real mfacb = (D.f[DIR_MP0  ])[knw  ];
-		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-		real mfbac = (D.f[DIR_0MP  ])[kts  ];
+		real mfcbb = (D.f[DIR_P00])[ke   ];
+		real mfabb = (D.f[DIR_M00])[kw   ];
+		real mfbcb = (D.f[DIR_0P0])[kn   ];
+		real mfbab = (D.f[DIR_0M0])[ks   ];
+		real mfbbc = (D.f[DIR_00P])[kt   ];
+		real mfbba = (D.f[DIR_00M])[kb   ];
+		real mfccb = (D.f[DIR_PP0])[kne  ];
+		real mfaab = (D.f[DIR_MM0])[ksw  ];
+		real mfcab = (D.f[DIR_PM0])[kse  ];
+		real mfacb = (D.f[DIR_MP0])[knw  ];
+		real mfcbc = (D.f[DIR_P0P])[kte  ];
+		real mfaba = (D.f[DIR_M0M])[kbw  ];
+		real mfcba = (D.f[DIR_P0M])[kbe  ];
+		real mfabc = (D.f[DIR_M0P])[ktw  ];
+		real mfbcc = (D.f[DIR_0PP])[ktn  ];
+		real mfbaa = (D.f[DIR_0MM])[kbs  ];
+		real mfbca = (D.f[DIR_0PM])[kbn  ];
+		real mfbac = (D.f[DIR_0MP])[kts  ];
 		real mfbbb = (D.f[DIR_000])[kzero];
-		real mfccc = (D.f[DIR_PPP ])[ktne ];
-		real mfaac = (D.f[DIR_MMP ])[ktsw ];
-		real mfcac = (D.f[DIR_PMP ])[ktse ];
-		real mfacc = (D.f[DIR_MPP ])[ktnw ];
-		real mfcca = (D.f[DIR_PPM ])[kbne ];
-		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-		real mfcaa = (D.f[DIR_PMM ])[kbse ];
-		real mfaca = (D.f[DIR_MPM ])[kbnw ];
+		real mfccc = (D.f[DIR_PPP])[ktne ];
+		real mfaac = (D.f[DIR_MMP])[ktsw ];
+		real mfcac = (D.f[DIR_PMP])[ktse ];
+		real mfacc = (D.f[DIR_MPP])[ktnw ];
+		real mfcca = (D.f[DIR_PPM])[kbne ];
+		real mfaaa = (D.f[DIR_MMM])[kbsw ];
+		real mfcaa = (D.f[DIR_PMM])[kbse ];
+		real mfaca = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////////
 		real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
 			        mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
@@ -1049,33 +1074,33 @@ __global__ void QVeloStreetDeviceEQ27(
 		mfcaa = -rho * XXc * YYa * ZZa - c1o216;
 		mfaca = -rho * XXa * YYc * ZZa - c1o216;
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-		(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-		(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-		(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-		(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-		(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-		(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-		(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-		(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-		(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-		(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-		(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-		(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-		(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-		(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-		(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-		(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-		(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+		(D.f[DIR_P00])[ke   ] = mfabb;//mfcbb;
+		(D.f[DIR_M00])[kw   ] = mfcbb;//mfabb;
+		(D.f[DIR_0P0])[kn   ] = mfbab;//mfbcb;
+		(D.f[DIR_0M0])[ks   ] = mfbcb;//mfbab;
+		(D.f[DIR_00P])[kt   ] = mfbba;//mfbbc;
+		(D.f[DIR_00M])[kb   ] = mfbbc;//mfbba;
+		(D.f[DIR_PP0])[kne  ] = mfaab;//mfccb;
+		(D.f[DIR_MM0])[ksw  ] = mfccb;//mfaab;
+		(D.f[DIR_PM0])[kse  ] = mfacb;//mfcab;
+		(D.f[DIR_MP0])[knw  ] = mfcab;//mfacb;
+		(D.f[DIR_P0P])[kte  ] = mfaba;//mfcbc;
+		(D.f[DIR_M0M])[kbw  ] = mfcbc;//mfaba;
+		(D.f[DIR_P0M])[kbe  ] = mfabc;//mfcba;
+		(D.f[DIR_M0P])[ktw  ] = mfcba;//mfabc;
+		(D.f[DIR_0PP])[ktn  ] = mfbaa;//mfbcc;
+		(D.f[DIR_0MM])[kbs  ] = mfbcc;//mfbaa;
+		(D.f[DIR_0PM])[kbn  ] = mfbac;//mfbca;
+		(D.f[DIR_0MP])[kts  ] = mfbca;//mfbac;
 		(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-		(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-		(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-		(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-		(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-		(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-		(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-		(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-		(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
+		(D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc;
+		(D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac;
+		(D.f[DIR_PMP])[ktse ] = mfaca;//mfcac;
+		(D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc;
+		(D.f[DIR_PPM])[kbne ] = mfaac;//mfcca;
+		(D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa;
+		(D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa;
+		(D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca;
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1120,80 +1145,80 @@ __global__ void QVeloStreetDeviceEQ27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceIncompHighNu27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD, 
-													int* k_Q, 
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1218,24 +1243,24 @@ __global__ void QVelDeviceIncompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1278,32 +1303,32 @@ __global__ void QVelDeviceIncompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1328,63 +1353,63 @@ __global__ void QVelDeviceIncompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1618,80 +1643,80 @@ __global__ void QVelDeviceIncompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompHighNu27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1716,24 +1741,24 @@ __global__ void QVelDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1776,58 +1801,58 @@ __global__ void QVelDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
-      //f_W    = (D.f[DIR_P00   ])[ke   ];
-      //f_E    = (D.f[DIR_M00   ])[kw   ];
-      //f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f_B    = (D.f[DIR_00P   ])[kt   ];
-      //f_T    = (D.f[DIR_00M   ])[kb   ];
-      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
+      //f_W    = (D.f[DIR_P00])[ke   ];
+      //f_E    = (D.f[DIR_M00])[kw   ];
+      //f_S    = (D.f[DIR_0P0])[kn   ];
+      //f_N    = (D.f[DIR_0M0])[ks   ];
+      //f_B    = (D.f[DIR_00P])[kt   ];
+      //f_T    = (D.f[DIR_00M])[kb   ];
+      //f_SW   = (D.f[DIR_PP0])[kne  ];
+      //f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0])[kse  ];
+      //f_SE   = (D.f[DIR_MP0])[knw  ];
+      //f_BW   = (D.f[DIR_P0P])[kte  ];
+      //f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1852,63 +1877,63 @@ __global__ void QVelDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -2194,39 +2219,32 @@ __global__ void QVelDeviceCompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompZeroPress27(
-														real* velocityX,
-														real* velocityY,
-														real* velocityZ,
-														real* distribution, 
-														int* subgridDistanceIndices, 
-														real* subgridDistances,
-														unsigned int numberOfBCnodes, 
-														real omega, 
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int numberOfLBnodes, 
-														bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distribution, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
-	//! The velocity boundary condition is executed in the following steps
-	//!
-	////////////////////////////////////////////////////////////////////////////////
-	//! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-	//!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   //! The velocity boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
 
       //////////////////////////////////////////////////////////////////////////
@@ -2239,9 +2257,9 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -2253,7 +2271,7 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int KQK  = subgridDistanceIndices[k];
+      unsigned int KQK  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -2285,32 +2303,32 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2342,7 +2360,7 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -2351,7 +2369,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -2360,7 +2378,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -2369,7 +2387,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -2378,7 +2396,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -2387,7 +2405,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -2396,7 +2414,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -2405,7 +2423,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -2414,7 +2432,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -2423,7 +2441,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -2432,7 +2450,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -2441,7 +2459,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -2450,7 +2468,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -2459,7 +2477,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -2468,7 +2486,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -2477,7 +2495,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -2486,7 +2504,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -2495,7 +2513,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -2504,7 +2522,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -2513,7 +2531,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -2522,7 +2540,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -2531,7 +2549,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -2540,7 +2558,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -2549,7 +2567,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -2558,7 +2576,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -2567,7 +2585,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2619,87 +2637,88 @@ __global__ void QVelDeviceCompZeroPress27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDeviceCompZeroPress1h27( int inx,
-														int iny,
-														real* vx,
-														real* vy,
-														real* vz,
-														real* DD, 
-														int* k_Q, 
-														real* QQ,
-														unsigned int numberOfBCnodes,
-														real om1, 
-														real Phi,
-														real angularVelocity,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* coordX,
-														real* coordY,
-														real* coordZ,
-														unsigned int size_Mat, 
-														bool isEvenTimestep)
+__global__ void QVelDeviceCompZeroPress1h27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2738,24 +2757,24 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2797,63 +2816,63 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3, drho, feq, q, cu_sq;
@@ -3090,21 +3109,22 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void LB_BC_Vel_West_27( int nx, 
-                                              int ny, 
-                                              int nz, 
-                                              int itz, 
-                                              unsigned int* bcMatD, 
-                                              unsigned int* neighborX,
-                                              unsigned int* neighborY,
-                                              unsigned int* neighborZ,
-                                              real* DD, 
-                                              unsigned int size_Mat, 
-                                              bool isEvenTimestep, 
-                                              real u0x, 
-                                              unsigned int grid_nx, 
-                                              unsigned int grid_ny, 
-                                              real om) 
+__global__ void LB_BC_Vel_West_27(
+    int nx, 
+    int ny, 
+    int nz, 
+    int itz, 
+    unsigned int* bcMatD, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD, 
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep, 
+    real u0x, 
+    unsigned int grid_nx, 
+    unsigned int grid_ny, 
+    real om) 
 {
    //thread-index
    unsigned int ity = blockIdx.x;
@@ -3125,63 +3145,63 @@ __global__ void LB_BC_Vel_West_27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -3300,33 +3320,33 @@ __global__ void LB_BC_Vel_West_27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
@@ -3343,32 +3363,32 @@ __global__ void LB_BC_Vel_West_27( int nx,
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-      (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-      (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-      (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-      (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-      (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-      (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-      (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-      (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-      (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-      (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-      (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-      (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-      (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-      (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-      (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-      (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-      (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-      (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-      (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-      (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+      (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+      (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+      (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+      (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+      (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+      (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+      (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+      (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+      (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+      (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+      (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+      (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
    }
    __syncthreads();
 }          
@@ -3414,18 +3434,18 @@ __global__ void LB_BC_Vel_West_27( int nx,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDevPlainBB27(
-   real* velocityX,
-   real* velocityY,
-   real* velocityZ,
-   real* distributions,
-   int* subgridDistanceIndices,
-   real* subgridDistances,
-   uint numberOfBCnodes,
-   uint* neighborX,
-   uint* neighborY,
-   uint* neighborZ,
-   uint numberOfLBnodes,
-   bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    uint numberOfBCnodes,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The velocity boundary condition is executed in the following steps
@@ -3433,18 +3453,11 @@ __global__ void QVelDevPlainBB27(
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;   // global x-index
-   const unsigned  y = blockIdx.x;    // global y-index
-   const unsigned  z = blockIdx.y;    // global z-index
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size of boundary condition (numberOfBCnodes)
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
        //////////////////////////////////////////////////////////////////////////
        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -3456,9 +3469,9 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
@@ -3469,7 +3482,7 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      uint indexOfBCnode = subgridDistanceIndices[k];
+      uint indexOfBCnode = subgridDistanceIndices[nodeIndex];
       uint ke   = indexOfBCnode;
       uint kw   = neighborX[indexOfBCnode];
       uint kn   = indexOfBCnode;
@@ -3500,32 +3513,32 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -3535,32 +3548,32 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E   + c4o9  * (-VeloX);
-      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W   + c4o9  * ( VeloX);
-      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N   + c4o9  * (-VeloY);
-      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S   + c4o9  * ( VeloY);
-      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T   + c4o9  * (-VeloZ);
-      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B   + c4o9  * ( VeloZ);
-      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
-      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
-      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
-      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
-      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
-      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
-      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
-      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
-      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
-      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
-      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
-      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
-      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
-      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
-      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
-      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
-      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
-      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
-      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
-      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_P00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00])[kw  ]=f_E   + c4o9  * (-VeloX);
+      q = (subgridD.q[DIR_M00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00])[ke  ]=f_W   + c4o9  * ( VeloX);
+      q = (subgridD.q[DIR_0P0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0])[ks  ]=f_N   + c4o9  * (-VeloY);
+      q = (subgridD.q[DIR_0M0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0])[kn  ]=f_S   + c4o9  * ( VeloY);
+      q = (subgridD.q[DIR_00P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M])[kb  ]=f_T   + c4o9  * (-VeloZ);
+      q = (subgridD.q[DIR_00M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P])[kt  ]=f_B   + c4o9  * ( VeloZ);
+      q = (subgridD.q[DIR_PP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
+      q = (subgridD.q[DIR_MM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
+      q = (subgridD.q[DIR_PM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
+      q = (subgridD.q[DIR_MP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
+      q = (subgridD.q[DIR_P0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
+      q = (subgridD.q[DIR_M0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
+      q = (subgridD.q[DIR_P0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
+      q = (subgridD.q[DIR_M0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
+      q = (subgridD.q[DIR_0PP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
+      q = (subgridD.q[DIR_0MM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
+      q = (subgridD.q[DIR_0PM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
+      q = (subgridD.q[DIR_0MP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
+      q = (subgridD.q[DIR_PPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_MMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_PPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_MMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_PMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_MPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_PMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_MPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3604,80 +3617,81 @@ __global__ void QVelDevPlainBB27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDevCouette27(real* vx,
-											real* vy,
-	 										real* vz,
-											real* DD,
-											int* k_Q, 
-											real* QQ,
-											unsigned int numberOfBCnodes, 
-											real om1, 
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat, 
-											bool isEvenTimestep)
+__global__ void QVelDevCouette27(
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3702,24 +3716,24 @@ __global__ void QVelDevCouette27(real* vx,
 			 *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			 *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			 *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3761,94 +3775,94 @@ __global__ void QVelDevCouette27(real* vx,
       ////////////////////////////////////////////////////////////////////////////////
      
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ///////               FlowDirection Y !!!!!!!!!!                                                           ///////////////////////////////////
@@ -3868,24 +3882,24 @@ __global__ void QVelDevCouette27(real* vx,
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set distributions
       real q;
-      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2o27  * VeloX;	
-      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2o27  * VeloX;	
-      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2o27  * VeloY;	
-      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2o27  * VeloY;	
-	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2o27  * VeloZ;
-      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M00])[kw  ]=f_E   + ms*c2o27  * VeloX;	
+      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P00])[ke  ]=f_W   - ms*c2o27  * VeloX;	
+      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0M0])[ks  ]=f_N   + ms*c2o27  * VeloY;	
+      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0P0])[kn  ]=f_S   - ms*c2o27  * VeloY;	
+	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00M])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00P])[kt  ]=f_B   - ms*c2o27  * VeloZ;
+      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MM0])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PP0])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MP0])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PM0])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0M])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0P])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0P])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0M])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MM])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PP])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MP])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PM])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
       q = q_dirTNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
       q = q_dirBSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirBNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
@@ -3894,24 +3908,24 @@ __global__ void QVelDevCouette27(real* vx,
       q = q_dirBNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMP])[ktse]=f_BNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirBSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MPP])[ktnw]=f_BSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirTNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMM])[kbse]=f_TNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2over27  * VeloX;	
-   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2over27  * VeloX;	
-   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2over27  * VeloY;	
-   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2over27  * VeloY;	
-	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2over27  * VeloZ;
-   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[DIR_M00])[kw  ]=f_E   + ms*c2over27  * VeloX;	
+   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[DIR_P00])[ke  ]=f_W   - ms*c2over27  * VeloX;	
+   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[DIR_0M0])[ks  ]=f_N   + ms*c2over27  * VeloY;	
+   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[DIR_0P0])[kn  ]=f_S   - ms*c2over27  * VeloY;	
+	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[DIR_00M])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[DIR_00P])[kt  ]=f_B   - ms*c2over27  * VeloZ;
+   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MM0])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PP0])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MP0])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PM0])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0M])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0P])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0P])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0M])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MM])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PP])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MP])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PM])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
    //   q = q_dirTNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
    //   q = q_dirBSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
    //   q = q_dirBNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
@@ -3964,87 +3978,88 @@ __global__ void QVelDevCouette27(real* vx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDev1h27( int inx,
-										int iny,
-										real* vx,
-										real* vy,
-										real* vz,
-										real* DD, 
-										int* k_Q, 
-										real* QQ,
-										unsigned int numberOfBCnodes, 
-										real om1,
-										real Phi,
-										real angularVelocity,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* coordX,
-										real* coordY,
-										real* coordZ,
-										unsigned int size_Mat, 
-										bool isEvenTimestep)
+__global__ void QVelDev1h27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1,
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4079,24 +4094,24 @@ __global__ void QVelDev1h27( int inx,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4167,32 +4182,32 @@ __global__ void QVelDev1h27( int inx,
 		//real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
 		//	f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		//f_W    = (D.f[DIR_P00   ])[ke   ];
-		//f_E    = (D.f[DIR_M00   ])[kw   ];
-		//f_S    = (D.f[DIR_0P0   ])[kn   ];
-		//f_N    = (D.f[DIR_0M0   ])[ks   ];
-		//f_B    = (D.f[DIR_00P   ])[kt   ];
-		//f_T    = (D.f[DIR_00M   ])[kb   ];
-		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+		//f_W    = (D.f[DIR_P00])[ke   ];
+		//f_E    = (D.f[DIR_M00])[kw   ];
+		//f_S    = (D.f[DIR_0P0])[kn   ];
+		//f_N    = (D.f[DIR_0M0])[ks   ];
+		//f_B    = (D.f[DIR_00P])[kt   ];
+		//f_T    = (D.f[DIR_00M])[kb   ];
+		//f_SW   = (D.f[DIR_PP0])[kne  ];
+		//f_NE   = (D.f[DIR_MM0])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0])[kse  ];
+		//f_SE   = (D.f[DIR_MP0])[knw  ];
+		//f_BW   = (D.f[DIR_P0P])[kte  ];
+		//f_TE   = (D.f[DIR_M0M])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP])[ktne ];
+		//f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP])[ktse ];
+		//f_BSE  = (D.f[DIR_MPP])[ktnw ];
+		//f_TSW  = (D.f[DIR_PPM])[kbne ];
+		//f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		//f_TNW  = (D.f[DIR_PMM])[kbse ];
+		//f_TSE  = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		real /*vx1, vx2,*/ vx3, drho, feq, q, cu_sq;
 		//drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -4217,63 +4232,63 @@ __global__ void QVelDev1h27( int inx,
 		//////////////////////////////////////////////////////////////////////////
 		if (isEvenTimestep==false)
 		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 		} 
 		else
 		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//Test
@@ -4748,39 +4763,32 @@ __global__ void QVelDev1h27( int inx,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceComp27(
-											real* velocityX,
-											real* velocityY,
-											real* velocityZ,
-											real* distributions,
-											int* subgridDistanceIndices,
-											real* subgridDistances,
-											unsigned int numberOfBCnodes,
-											real omega,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int numberOfLBnodes,
-											bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The velocity boundary condition is executed in the following steps
    //!
-   ////////////////////////////////////////////////////////////////////////////////
-   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-   //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -4792,9 +4800,9 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
@@ -4805,7 +4813,7 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -4837,32 +4845,32 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -4894,7 +4902,7 @@ __global__ void QVelDeviceComp27(
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -4903,7 +4911,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -4912,7 +4920,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -4921,7 +4929,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -4930,7 +4938,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -4939,7 +4947,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -4948,7 +4956,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -4957,7 +4965,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -4966,7 +4974,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -4975,7 +4983,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -4984,7 +4992,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -4993,7 +5001,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -5002,7 +5010,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -5011,7 +5019,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -5020,7 +5028,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -5029,7 +5037,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -5038,7 +5046,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -5047,7 +5055,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -5056,7 +5064,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -5065,7 +5073,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -5074,7 +5082,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -5083,7 +5091,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -5092,7 +5100,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -5101,7 +5109,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -5110,7 +5118,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -5119,7 +5127,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -5170,82 +5178,83 @@ __global__ void QVelDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDevice27(int inx,
-                                        int iny,
-                                        real* vx,
-                                        real* vy,
-                                        real* vz,
-                                        real* DD, 
-                                        int* k_Q, 
-                                        real* QQ,
-                                        unsigned int numberOfBCnodes, 
-                                        real om1, 
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat, 
-                                        bool isEvenTimestep)
+__global__ void QVelDevice27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5270,24 +5279,24 @@ __global__ void QVelDevice27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -5358,32 +5367,32 @@ __global__ void QVelDevice27(int inx,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -5408,63 +5417,63 @@ __global__ void QVelDevice27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -5723,19 +5732,20 @@ __global__ void QVelDevice27(int inx,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void PropellerBC(unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       real* rho,
-                                       real* ux,
-                                       real* uy,
-                                       real* uz,
-                                       int* k_Q, 
-									   unsigned int size_Prop,
-                                       unsigned int size_Mat,
-                                       unsigned int* bcMatD,
-                                       real* DD,
-                                       bool EvenOrOdd)
+__global__ void PropellerBC(
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    int* k_Q, 
+    unsigned int size_Prop,
+    unsigned long long numberOfLBnodes,
+    unsigned int* bcMatD,
+    real* DD,
+    bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5754,63 +5764,63 @@ __global__ void PropellerBC(unsigned int* neighborX,
         Distributions27 D;
         if (EvenOrOdd==true)
         {
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
         }
         else
         {
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
         }
         //////////////////////////////////////////////////////////////////////////
 		unsigned int KQK = k_Q[k];
@@ -5859,58 +5869,58 @@ __global__ void PropellerBC(unsigned int* neighborX,
 		f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
 		f_ZERO= (D.f[DIR_000])[kzero];
-		f_E   = (D.f[DIR_P00   ])[ke   ];
-		f_W   = (D.f[DIR_M00   ])[kw   ];
-		f_N   = (D.f[DIR_0P0   ])[kn   ];
-		f_S   = (D.f[DIR_0M0   ])[ks   ];
-		f_T   = (D.f[DIR_00P   ])[kt   ];
-		f_B   = (D.f[DIR_00M   ])[kb   ];
-		f_NE  = (D.f[DIR_PP0  ])[kne  ];
-		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-		f_SE  = (D.f[DIR_PM0  ])[kse  ];
-		f_NW  = (D.f[DIR_MP0  ])[knw  ];
-		f_TE  = (D.f[DIR_P0P  ])[kte  ];
-		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-		f_TS  = (D.f[DIR_0MP  ])[kts  ];
-		f_TNE = (D.f[DIR_PPP ])[ktne ];
-		f_BSW = (D.f[DIR_MMM ])[kbsw ];
-		f_BNE = (D.f[DIR_PPM ])[kbne ];
-		f_TSW = (D.f[DIR_MMP ])[ktsw ];
-		f_TSE = (D.f[DIR_PMP ])[ktse ];
-		f_BNW = (D.f[DIR_MPM ])[kbnw ];
-		f_BSE = (D.f[DIR_PMM ])[kbse ];
-		f_TNW = (D.f[DIR_MPP ])[ktnw ];
-		//f_W    = (D.f[DIR_P00   ])[ke   ];
-		//f_E    = (D.f[DIR_M00   ])[kw   ];
-		//f_S    = (D.f[DIR_0P0   ])[kn   ];
-		//f_N    = (D.f[DIR_0M0   ])[ks   ];
-		//f_B    = (D.f[DIR_00P   ])[kt   ];
-		//f_T    = (D.f[DIR_00M   ])[kb   ];
-		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
-		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+		f_E   = (D.f[DIR_P00])[ke   ];
+		f_W   = (D.f[DIR_M00])[kw   ];
+		f_N   = (D.f[DIR_0P0])[kn   ];
+		f_S   = (D.f[DIR_0M0])[ks   ];
+		f_T   = (D.f[DIR_00P])[kt   ];
+		f_B   = (D.f[DIR_00M])[kb   ];
+		f_NE  = (D.f[DIR_PP0])[kne  ];
+		f_SW  = (D.f[DIR_MM0])[ksw  ];
+		f_SE  = (D.f[DIR_PM0])[kse  ];
+		f_NW  = (D.f[DIR_MP0])[knw  ];
+		f_TE  = (D.f[DIR_P0P])[kte  ];
+		f_BW  = (D.f[DIR_M0M])[kbw  ];
+		f_BE  = (D.f[DIR_P0M])[kbe  ];
+		f_TW  = (D.f[DIR_M0P])[ktw  ];
+		f_TN  = (D.f[DIR_0PP])[ktn  ];
+		f_BS  = (D.f[DIR_0MM])[kbs  ];
+		f_BN  = (D.f[DIR_0PM])[kbn  ];
+		f_TS  = (D.f[DIR_0MP])[kts  ];
+		f_TNE = (D.f[DIR_PPP])[ktne ];
+		f_BSW = (D.f[DIR_MMM])[kbsw ];
+		f_BNE = (D.f[DIR_PPM])[kbne ];
+		f_TSW = (D.f[DIR_MMP])[ktsw ];
+		f_TSE = (D.f[DIR_PMP])[ktse ];
+		f_BNW = (D.f[DIR_MPM])[kbnw ];
+		f_BSE = (D.f[DIR_PMM])[kbse ];
+		f_TNW = (D.f[DIR_MPP])[ktnw ];
+		//f_W    = (D.f[DIR_P00])[ke   ];
+		//f_E    = (D.f[DIR_M00])[kw   ];
+		//f_S    = (D.f[DIR_0P0])[kn   ];
+		//f_N    = (D.f[DIR_0M0])[ks   ];
+		//f_B    = (D.f[DIR_00P])[kt   ];
+		//f_T    = (D.f[DIR_00M])[kb   ];
+		//f_SW   = (D.f[DIR_PP0])[kne  ];
+		//f_NE   = (D.f[DIR_MM0])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0])[kse  ];
+		//f_SE   = (D.f[DIR_MP0])[knw  ];
+		//f_BW   = (D.f[DIR_P0P])[kte  ];
+		//f_TE   = (D.f[DIR_M0M])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP])[ktne ];
+		//f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		//f_TSW  = (D.f[DIR_PPM])[kbne ];
+		//f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP])[ktse ];
+		//f_TSE  = (D.f[DIR_MPM])[kbnw ];
+		//f_TNW  = (D.f[DIR_PMM])[kbse ];
+		//f_BSE  = (D.f[DIR_MPP])[ktnw ];
 		//////////////////////////////////////////////////////////////////////////////////
 		real vxo1, vxo2, vxo3, drho;
 		drho   =  /*zero;*/f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -5992,88 +6002,88 @@ __global__ void PropellerBC(unsigned int* neighborX,
          f_TNW  = f_TNW  + ((c1o1+drho) * (-  c1o216*(c3o1*(-vxo1+vxo2+vxo3)+c9o2*(-vxo1+vxo2+vxo3)*(-vxo1+vxo2+vxo3)-cusq) +   c1o216*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq2)));
 
 		(D.f[DIR_000])[kzero] =  f_ZERO;
-        (D.f[DIR_P00   ])[ke   ] =  f_E   ;	// f_W   ;//    	
-        (D.f[DIR_M00   ])[kw   ] =  f_W   ;	// f_E   ;//    	
-        (D.f[DIR_0P0   ])[kn   ] =  f_N   ;	// f_S   ;//    	
-        (D.f[DIR_0M0   ])[ks   ] =  f_S   ;	// f_N   ;//    	
-        (D.f[DIR_00P   ])[kt   ] =  f_T   ;	// f_B   ;//    	
-        (D.f[DIR_00M   ])[kb   ] =  f_B   ;	// f_T   ;//    	
-        (D.f[DIR_PP0  ])[kne  ] =  f_NE  ;	// f_SW  ;//    	
-        (D.f[DIR_MM0  ])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
-        (D.f[DIR_PM0  ])[kse  ] =  f_SE  ;	// f_NW  ;//    	
-        (D.f[DIR_MP0  ])[knw  ] =  f_NW  ;	// f_SE  ;//    	
-        (D.f[DIR_P0P  ])[kte  ] =  f_TE  ;	// f_BW  ;//    	
-        (D.f[DIR_M0M  ])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
-        (D.f[DIR_P0M  ])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
-        (D.f[DIR_M0P  ])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
-        (D.f[DIR_0PP  ])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
-        (D.f[DIR_0MM  ])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
-        (D.f[DIR_0PM  ])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
-        (D.f[DIR_0MP  ])[kts  ] =  f_TS  ;	// f_BN  ;//    	
-        (D.f[DIR_PPP ])[ktne ] =  f_TNE ;	// f_BSW ;//    	
-        (D.f[DIR_MMM ])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
-        (D.f[DIR_PPM ])[kbne ] =  f_BNE ;	// f_BNW ;//    	
-        (D.f[DIR_MMP ])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
-        (D.f[DIR_PMP ])[ktse ] =  f_TSE ;	// f_TSW ;//    	
-        (D.f[DIR_MPM ])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
-        (D.f[DIR_PMM ])[kbse ] =  f_BSE ;	// f_TNW ;//    	
-        (D.f[DIR_MPP ])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
+        (D.f[DIR_P00])[ke   ] =  f_E   ;	// f_W   ;//    	
+        (D.f[DIR_M00])[kw   ] =  f_W   ;	// f_E   ;//    	
+        (D.f[DIR_0P0])[kn   ] =  f_N   ;	// f_S   ;//    	
+        (D.f[DIR_0M0])[ks   ] =  f_S   ;	// f_N   ;//    	
+        (D.f[DIR_00P])[kt   ] =  f_T   ;	// f_B   ;//    	
+        (D.f[DIR_00M])[kb   ] =  f_B   ;	// f_T   ;//    	
+        (D.f[DIR_PP0])[kne  ] =  f_NE  ;	// f_SW  ;//    	
+        (D.f[DIR_MM0])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
+        (D.f[DIR_PM0])[kse  ] =  f_SE  ;	// f_NW  ;//    	
+        (D.f[DIR_MP0])[knw  ] =  f_NW  ;	// f_SE  ;//    	
+        (D.f[DIR_P0P])[kte  ] =  f_TE  ;	// f_BW  ;//    	
+        (D.f[DIR_M0M])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
+        (D.f[DIR_P0M])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
+        (D.f[DIR_M0P])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
+        (D.f[DIR_0PP])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
+        (D.f[DIR_0MM])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
+        (D.f[DIR_0PM])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
+        (D.f[DIR_0MP])[kts  ] =  f_TS  ;	// f_BN  ;//    	
+        (D.f[DIR_PPP])[ktne ] =  f_TNE ;	// f_BSW ;//    	
+        (D.f[DIR_MMM])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
+        (D.f[DIR_PPM])[kbne ] =  f_BNE ;	// f_BNW ;//    	
+        (D.f[DIR_MMP])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
+        (D.f[DIR_PMP])[ktse ] =  f_TSE ;	// f_TSW ;//    	
+        (D.f[DIR_MPM])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
+        (D.f[DIR_PMM])[kbse ] =  f_BSE ;	// f_TNW ;//    	
+        (D.f[DIR_MPP])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
 
 		//////////////////////////////////////////////////////////////////////////
         ////(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[DIR_P00   ])[ke   ] =   three*c2over27* ( vx1        );		//six
-        //(D.f[DIR_M00   ])[kw   ] =   three*c2over27* (-vx1        );		//six
-        //(D.f[DIR_0P0   ])[kn   ] =   three*c2over27* (     vx2    );		//six
-        //(D.f[DIR_0M0   ])[ks   ] =   three*c2over27* (    -vx2    );		//six
-        //(D.f[DIR_00P   ])[kt   ] =   three*c2over27* (         vx3);		//six
-        //(D.f[DIR_00M   ])[kb   ] =   three*c2over27* (        -vx3);		//six
-        //(D.f[DIR_PP0  ])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
-        //(D.f[DIR_MM0  ])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
-        //(D.f[DIR_PM0  ])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
-        //(D.f[DIR_MP0  ])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
-        //(D.f[DIR_P0P  ])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
-        //(D.f[DIR_M0M  ])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
-        //(D.f[DIR_P0M  ])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
-        //(D.f[DIR_M0P  ])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
-        //(D.f[DIR_0PP  ])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
-        //(D.f[DIR_0MM  ])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
-        //(D.f[DIR_0PM  ])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
-        //(D.f[DIR_0MP  ])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
-        //(D.f[DIR_PPP ])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
-        //(D.f[DIR_MMM ])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
-        //(D.f[DIR_PPM ])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
-        //(D.f[DIR_MMP ])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
-        //(D.f[DIR_PMP ])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
-        //(D.f[DIR_MPM ])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
-        //(D.f[DIR_PMM ])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
-        //(D.f[DIR_MPP ])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
+        //(D.f[DIR_P00])[ke   ] =   three*c2over27* ( vx1        );		//six
+        //(D.f[DIR_M00])[kw   ] =   three*c2over27* (-vx1        );		//six
+        //(D.f[DIR_0P0])[kn   ] =   three*c2over27* (     vx2    );		//six
+        //(D.f[DIR_0M0])[ks   ] =   three*c2over27* (    -vx2    );		//six
+        //(D.f[DIR_00P])[kt   ] =   three*c2over27* (         vx3);		//six
+        //(D.f[DIR_00M])[kb   ] =   three*c2over27* (        -vx3);		//six
+        //(D.f[DIR_PP0])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
+        //(D.f[DIR_MM0])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
+        //(D.f[DIR_PM0])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
+        //(D.f[DIR_MP0])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
+        //(D.f[DIR_P0P])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
+        //(D.f[DIR_M0M])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
+        //(D.f[DIR_P0M])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
+        //(D.f[DIR_M0P])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
+        //(D.f[DIR_0PP])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
+        //(D.f[DIR_0MM])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
+        //(D.f[DIR_0PM])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
+        //(D.f[DIR_0MP])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
+        //(D.f[DIR_PPP])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
+        //(D.f[DIR_MMM])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
+        //(D.f[DIR_PPM])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
+        //(D.f[DIR_MMP])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
+        //(D.f[DIR_PMP])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
+        //(D.f[DIR_MPM])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
+        //(D.f[DIR_PMM])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
+        //(D.f[DIR_MPP])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
         //(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[DIR_P00   ])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
-        //(D.f[DIR_M00   ])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
-        //(D.f[DIR_0P0   ])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
-        //(D.f[DIR_0M0   ])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
-        //(D.f[DIR_00P   ])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
-        //(D.f[DIR_00M   ])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
-        //(D.f[DIR_PP0  ])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-        //(D.f[DIR_MM0  ])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-        //(D.f[DIR_PM0  ])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-        //(D.f[DIR_MP0  ])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-        //(D.f[DIR_P0P  ])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-        //(D.f[DIR_M0M  ])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-        //(D.f[DIR_P0M  ])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-        //(D.f[DIR_M0P  ])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-        //(D.f[DIR_0PP  ])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-        //(D.f[DIR_0MM  ])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-        //(D.f[DIR_0PM  ])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-        //(D.f[DIR_0MP  ])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-        //(D.f[DIR_PPP ])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-        //(D.f[DIR_MMM ])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-        //(D.f[DIR_PPM ])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-        //(D.f[DIR_MMP ])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-        //(D.f[DIR_PMP ])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-        //(D.f[DIR_MPM ])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-        //(D.f[DIR_PMM ])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-        //(D.f[DIR_MPP ])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+        //(D.f[DIR_P00])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
+        //(D.f[DIR_M00])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
+        //(D.f[DIR_0P0])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
+        //(D.f[DIR_0M0])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
+        //(D.f[DIR_00P])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
+        //(D.f[DIR_00M])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
+        //(D.f[DIR_PP0])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+        //(D.f[DIR_MM0])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+        //(D.f[DIR_PM0])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+        //(D.f[DIR_MP0])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+        //(D.f[DIR_P0P])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+        //(D.f[DIR_M0M])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+        //(D.f[DIR_P0M])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+        //(D.f[DIR_M0P])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+        //(D.f[DIR_0PP])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+        //(D.f[DIR_0MM])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+        //(D.f[DIR_0PM])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+        //(D.f[DIR_0MP])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+        //(D.f[DIR_PPP])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+        //(D.f[DIR_MMM])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_PPM])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_MMP])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_PMP])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_MPM])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_PMM])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_MPP])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 		}
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
index 16028e2f9f87716f43ed60f82ed513289e381b7c..cbb892296322bc164241ad18c8ab63201d34647e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
@@ -23,7 +23,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real* veloZ,
 	real* DDStart,
 	real* turbulentViscosity,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd)
@@ -39,7 +39,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -50,63 +50,63 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
index d48fa80fd14ce15f4a380ed46403654b43c805e8..d2fe5935af9b2d3ad78f492e3a9d182873d20808 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
@@ -20,69 +20,69 @@ __global__ void WallFunction27(
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat, 
+										  unsigned long long numberOfLBnodes, 
 										  bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,24 +107,24 @@ __global__ void WallFunction27(
       //      *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
       //      *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //      *q_dirBSE, *q_dirBNW; 
-      //q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      //q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      //q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      //q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      //q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      //q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      //q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      //q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      //q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      //q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      //q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      //q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -167,32 +167,32 @@ __global__ void WallFunction27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       // real vx2, vx3, feq, q;
       real vx1, drho;
@@ -234,63 +234,63 @@ __global__ void WallFunction27(
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //   }
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //Test
diff --git a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
index 2c85de9e3ec57d50a66fde2c49d3e703676fbf04..508e4498c36d352761c3ecaf24abaa52a5f84bbe 100644
--- a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
@@ -45,26 +45,44 @@ void initLattice(SPtr<Parameter> para, SPtr<PreProcessor> preProcessor, SPtr<Cud
         preProcessor->init(para, lev);
 
         CalcMacCompSP27(
-            para->getParD(lev)->velocityX, para->getParD(lev)->velocityY, para->getParD(lev)->velocityZ, para->getParD(lev)->rho,
-            para->getParD(lev)->pressure, para->getParD(lev)->typeOfGridNode, para->getParD(lev)->neighborX,
-            para->getParD(lev)->neighborY, para->getParD(lev)->neighborZ, para->getParD(lev)->numberOfNodes,
-            para->getParD(lev)->numberofthreads, para->getParD(lev)->distributions.f[0], para->getParD(lev)->isEvenTimestep);
+            para->getParD(lev)->velocityX, 
+            para->getParD(lev)->velocityY, 
+            para->getParD(lev)->velocityZ, 
+            para->getParD(lev)->rho,
+            para->getParD(lev)->pressure, 
+            para->getParD(lev)->typeOfGridNode, 
+            para->getParD(lev)->neighborX,
+            para->getParD(lev)->neighborY, 
+            para->getParD(lev)->neighborZ, 
+            para->getParD(lev)->numberOfNodes,
+            para->getParD(lev)->numberofthreads, 
+            para->getParD(lev)->distributions.f[0], 
+            para->getParD(lev)->isEvenTimestep);
 
         if (para->getCalcMedian()) {
             constexpr uint tdiff = 1;
-            CalcMacMedSP27(para->getParD(lev)->vx_SP_Med, para->getParD(lev)->vy_SP_Med, para->getParD(lev)->vz_SP_Med,
-                           para->getParD(lev)->rho_SP_Med, para->getParD(lev)->press_SP_Med, para->getParD(lev)->typeOfGridNode,
-                           para->getParD(lev)->neighborX, para->getParD(lev)->neighborY,
-                           para->getParD(lev)->neighborZ, tdiff, para->getParD(lev)->numberOfNodes,
-                           para->getParD(lev)->numberofthreads, para->getParD(lev)->isEvenTimestep);
+            CalcMacMedSP27(
+                para->getParD(lev)->vx_SP_Med, 
+                para->getParD(lev)->vy_SP_Med, 
+                para->getParD(lev)->vz_SP_Med,
+                para->getParD(lev)->rho_SP_Med, 
+                para->getParD(lev)->press_SP_Med, 
+                para->getParD(lev)->typeOfGridNode,
+                para->getParD(lev)->neighborX, 
+                para->getParD(lev)->neighborY,
+                para->getParD(lev)->neighborZ, 
+                tdiff, 
+                para->getParD(lev)->numberOfNodes,
+                para->getParD(lev)->numberofthreads, 
+                para->getParD(lev)->isEvenTimestep);
         }
         // advection - diffusion
         if (para->getDiffOn()) {
 
             cudaMemoryManager->cudaAllocConcentration(lev);
 
-            for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++) {
-                para->getParH(lev)->Conc[i] = para->getTemperatureInit();
+            for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) {
+                para->getParH(lev)->Conc[index] = para->getTemperatureInit();
             }
             initTemperatur(para.get(), cudaMemoryManager.get(), lev);
         }
diff --git a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
index 4e5a862d3fd1ed19109073aae0fe4c731f7f3e91..6eaa0b17653aaf5257c00e674c87e2844c26cf5d 100644
--- a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
@@ -3,6 +3,7 @@
 #include "Parameter/Parameter.h"
 
 #include <basics/utilities/UbFileInputASCII.h>
+
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
@@ -169,7 +170,7 @@ void PositionReader::definePropellerQs(Parameter* para)
 	//////////////////////////////////////////////////////////////////
 	for(uint u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++)
 	{
-		for (int dir = DIR_P00; dir<=DIR_MMM; dir++)
+		for (size_t dir = DIR_P00; dir<=DIR_MMM; dir++)
 		{
 			if ((dir==DIR_P00)  || 
 				(dir==DIR_PP0) || (dir==DIR_PM0) || (dir==DIR_P0P) || (dir==DIR_P0M) ||
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 9f9f7539bc5a1e28612d956ca32234c5a3589f8a..50b4460d774010ea7d7b98cfa6fa505cdfeb88c2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -3,8 +3,11 @@
 
 #include <vector>
 
+#include "LBM/LB.h" 
+
 #include "Kernel/Utilities/KernelGroup.h"
 #include "PreProcessor/PreProcessorType.h"
+#include "Parameter/CudaStreamManager.h"
 
 #include <helper_cuda.h>
 
@@ -13,7 +16,7 @@ class Kernel
 public:
     virtual ~Kernel()  = default;
     virtual void run() = 0;
-    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0; //if stream == -1: run on default stream
+    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0;
 
     virtual bool checkParameter()                                = 0;
     virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 630aaf7339afc2907ab6bfbf65bd5fc55f75e215..9bd3945aa81147d03be2b1eac3ddec7c24d71532 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -1,9 +1,11 @@
 #include "KernelImp.h"
 
+#include "LBM/LB.h" 
+
 #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h"
 
 
-void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream)
+void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex)
 {
     printf("Method not implemented for this Kernel \n");
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index 0141ddda7e9579cc84148d26727ed81c084ea0c5..a96c2c123472ca33f635273e06a5bf36a745654d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -1,6 +1,8 @@
 #ifndef KERNEL_IMP_H
 #define KERNEL_IMP_H
 
+#include "LBM/LB.h" 
+
 #include "Kernel.h"
 
 #include <memory>
@@ -9,12 +11,12 @@
 
 class CheckParameterStrategy;
 class Parameter;
-
+class CudaStreamManager; 
 class KernelImp : public Kernel
 {
 public:
     virtual void run() = 0;
-    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1);
+    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex=CudaStreamIndex::Legacy);
 
     bool checkParameter();
     std::vector<PreProcessorType> getPreProcessorTypes();
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
index 51b9e4537fa0857e9302aa638ae7729fa9adcdbe..d4d6307f688da4c8fa37c54fb4958681d5ec4941 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
@@ -2,6 +2,7 @@
 
 #include "ADComp27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> pa
 
 void ADComp27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_KERNEL_AD_COMP_27 << < grid, threads >> >(	para->getParD(level)->diffusivity,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->distributions.f[0],
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_ThS27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_KERNEL_AD_COMP_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_KERNEL_AD_COMP_27 execution failed");
 }
 
 ADComp27::ADComp27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
index b4c1236300bbb49fe2df1b3f458f506e989e142b..40adfff91713b7d6db1e861be9282d1f38516c22 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
@@ -38,125 +38,125 @@ __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
index ab9b0c444513455e0498d79614575e87c2afb6a0..3ee06a1e9ea77c8443d94f44ea54d11ffe7304ac 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
@@ -2,6 +2,7 @@
 
 #include "ADComp7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para
 
 void ADComp7::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Comp_7 << < grid, threads >> >(	para->getParD(level)->diffusivity,
-											para->getParD(level)->typeOfGridNode,
-											para->getParD(level)->neighborX,
-											para->getParD(level)->neighborY,
-											para->getParD(level)->neighborZ,
-											para->getParD(level)->distributions.f[0], 
-											para->getParD(level)->distributionsAD7.f[0], 
-											para->getParD(level)->numberOfNodes,
-											para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_ThS7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Comp_7<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->distributionsAD7.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Comp_7 execution failed");
 }
 
 ADComp7::ADComp7(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
index 52ab9ba6e968ec2293f0a1c4959323c43f328206..ddaed84703640cd9c7d12d142ccc1bf8f9ea7efc 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions7 D7;
@@ -157,33 +157,33 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			real fTNE = (D.f[DIR_MMM])[kbsw];
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
-										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-										   //real fW    =  (D.f[DIR_M00   ])[kw ];
-										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
-										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-										   //real fB    =  (D.f[DIR_00M   ])[kb ];
-										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
-										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
-										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
-										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fE    =  (D.f[DIR_P00])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00])[kw ];
+										   //real fN    =  (D.f[DIR_0P0])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0])[ks ];
+										   //real fT    =  (D.f[DIR_00P])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP])[ks ];//kts
 										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
-										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
-										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+										   //real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
index 4ad8a4678ae2e4025a90f639ae366311a247e4b3..f2a9feaa998b628fb782844d1a7d946317e5af5f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
@@ -2,6 +2,7 @@
 
 #include "ADIncomp27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter
 
 void ADIncomp27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Incomp_27 << < grid, threads >> >(	para->getParD(level)->diffusivity, 
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX, 
-													para->getParD(level)->neighborY, 
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0], 
-													para->getParD(level)->distributionsAD27.f[0], 
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY, 
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->distributionsAD27.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
 }
 
 ADIncomp27::ADIncomp27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
index e686825ed100417110b02360876dec076553d7de..f9fdcee0f34106b05da0edc16e3fdd89f859752e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
@@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD27,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -38,125 +38,125 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+				D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+				D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+				D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+				D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+				D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+				D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+				D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+				D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -197,33 +197,33 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
-										   //real f27E    =  (D27.f[DIR_P00   ])[k  ];//ke
-										   //real f27W    =  (D27.f[DIR_M00   ])[kw ];
-										   //real f27N    =  (D27.f[DIR_0P0   ])[k  ];//kn
-										   //real f27S    =  (D27.f[DIR_0M0   ])[ks ];
-										   //real f27T    =  (D27.f[DIR_00P   ])[k  ];//kt
-										   //real f27B    =  (D27.f[DIR_00M   ])[kb ];
-										   //real f27NE   =  (D27.f[DIR_PP0  ])[k  ];//kne
-										   //real f27SW   =  (D27.f[DIR_MM0  ])[ksw];
-										   //real f27SE   =  (D27.f[DIR_PM0  ])[ks ];//kse
-										   //real f27NW   =  (D27.f[DIR_MP0  ])[kw ];//knw
-										   //real f27TE   =  (D27.f[DIR_P0P  ])[k  ];//kte
-										   //real f27BW   =  (D27.f[DIR_M0M  ])[kbw];
-										   //real f27BE   =  (D27.f[DIR_P0M  ])[kb ];//kbe
-										   //real f27TW   =  (D27.f[DIR_M0P  ])[kw ];//ktw
-										   //real f27TN   =  (D27.f[DIR_0PP  ])[k  ];//ktn
-										   //real f27BS   =  (D27.f[DIR_0MM  ])[kbs];
-										   //real f27BN   =  (D27.f[DIR_0PM  ])[kb ];//kbn
-										   //real f27TS   =  (D27.f[DIR_0MP  ])[ks ];//kts
+										   //real f27E    =  (D27.f[DIR_P00])[k  ];//ke
+										   //real f27W    =  (D27.f[DIR_M00])[kw ];
+										   //real f27N    =  (D27.f[DIR_0P0])[k  ];//kn
+										   //real f27S    =  (D27.f[DIR_0M0])[ks ];
+										   //real f27T    =  (D27.f[DIR_00P])[k  ];//kt
+										   //real f27B    =  (D27.f[DIR_00M])[kb ];
+										   //real f27NE   =  (D27.f[DIR_PP0])[k  ];//kne
+										   //real f27SW   =  (D27.f[DIR_MM0])[ksw];
+										   //real f27SE   =  (D27.f[DIR_PM0])[ks ];//kse
+										   //real f27NW   =  (D27.f[DIR_MP0])[kw ];//knw
+										   //real f27TE   =  (D27.f[DIR_P0P])[k  ];//kte
+										   //real f27BW   =  (D27.f[DIR_M0M])[kbw];
+										   //real f27BE   =  (D27.f[DIR_P0M])[kb ];//kbe
+										   //real f27TW   =  (D27.f[DIR_M0P])[kw ];//ktw
+										   //real f27TN   =  (D27.f[DIR_0PP])[k  ];//ktn
+										   //real f27BS   =  (D27.f[DIR_0MM])[kbs];
+										   //real f27BN   =  (D27.f[DIR_0PM])[kb ];//kbn
+										   //real f27TS   =  (D27.f[DIR_0MP])[ks ];//kts
 										   //real f27ZERO =  (D27.f[DIR_000])[k  ];//kzero
-										   //real f27TNE  =  (D27.f[DIR_PPP ])[k  ];//ktne
-										   //real f27TSW  =  (D27.f[DIR_MMP ])[ksw];//ktsw
-										   //real f27TSE  =  (D27.f[DIR_PMP ])[ks ];//ktse
-										   //real f27TNW  =  (D27.f[DIR_MPP ])[kw ];//ktnw
-										   //real f27BNE  =  (D27.f[DIR_PPM ])[kb ];//kbne
-										   //real f27BSW  =  (D27.f[DIR_MMM ])[kbsw];
-										   //real f27BSE  =  (D27.f[DIR_PMM ])[kbs];//kbse
-										   //real f27BNW  =  (D27.f[DIR_MPM ])[kbw];//kbnw
+										   //real f27TNE  =  (D27.f[DIR_PPP])[k  ];//ktne
+										   //real f27TSW  =  (D27.f[DIR_MMP])[ksw];//ktsw
+										   //real f27TSE  =  (D27.f[DIR_PMP])[ks ];//ktse
+										   //real f27TNW  =  (D27.f[DIR_MPP])[kw ];//ktnw
+										   //real f27BNE  =  (D27.f[DIR_PPM])[kb ];//kbne
+										   //real f27BSW  =  (D27.f[DIR_MMM])[kbsw];
+										   //real f27BSE  =  (D27.f[DIR_PMM])[kbs];//kbse
+										   //real f27BNW  =  (D27.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 										   //real vx1     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fTSE-fBNW)+(fBSE-fTNW) +(fNE-fSW)+(fSE-fNW)+(fTE-fBW)+(fBE-fTW)+(fE-fW));
 										   //real vx2     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fBNW-fTSE)+(fTNW-fBSE) +(fNE-fSW)+(fNW-fSE)+(fTN-fBS)+(fBN-fTS)+(fN-fS));
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
index a6d94de4fadb9a93a9e5fed63d87731b12ec2a07..3abee563f676910f422bba0930060c2a0b0c0e21 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD27,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd);
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
index 27da776eb7612307fa4f9af2886594fc0c75d90b..d0c6a6a24ab4d0ebebee9324bdafa1f9e3db51b9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
@@ -2,6 +2,7 @@
 
 #include "ADIncomp7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter>
 
 void ADIncomp7::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Incomp_7 << < grid, threads >> >(	para->getParD(level)->diffusivity, 
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX, 
-													para->getParD(level)->neighborY, 
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->distributionsAD7.f[0], 
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY, 
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->distributionsAD7.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
 }
 
 ADIncomp7::ADIncomp7(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
index d49b0b48d20d976076a52f804d485b68da55348e..e0bcc4e515b1b2ccf71f1050e2d572b60a40d94b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
@@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD7,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -38,85 +38,85 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions7 D7;
 			if (EvenOrOdd == true)
 			{
-				D7.f[0] = &DD7[0 * size_Mat];
-				D7.f[1] = &DD7[1 * size_Mat];
-				D7.f[2] = &DD7[2 * size_Mat];
-				D7.f[3] = &DD7[3 * size_Mat];
-				D7.f[4] = &DD7[4 * size_Mat];
-				D7.f[5] = &DD7[5 * size_Mat];
-				D7.f[6] = &DD7[6 * size_Mat];
+				D7.f[0] = &DD7[0 * numberOfLBnodes];
+				D7.f[1] = &DD7[1 * numberOfLBnodes];
+				D7.f[2] = &DD7[2 * numberOfLBnodes];
+				D7.f[3] = &DD7[3 * numberOfLBnodes];
+				D7.f[4] = &DD7[4 * numberOfLBnodes];
+				D7.f[5] = &DD7[5 * numberOfLBnodes];
+				D7.f[6] = &DD7[6 * numberOfLBnodes];
 			}
 			else
 			{
-				D7.f[0] = &DD7[0 * size_Mat];
-				D7.f[2] = &DD7[1 * size_Mat];
-				D7.f[1] = &DD7[2 * size_Mat];
-				D7.f[4] = &DD7[3 * size_Mat];
-				D7.f[3] = &DD7[4 * size_Mat];
-				D7.f[6] = &DD7[5 * size_Mat];
-				D7.f[5] = &DD7[6 * size_Mat];
+				D7.f[0] = &DD7[0 * numberOfLBnodes];
+				D7.f[2] = &DD7[1 * numberOfLBnodes];
+				D7.f[1] = &DD7[2 * numberOfLBnodes];
+				D7.f[4] = &DD7[3 * numberOfLBnodes];
+				D7.f[3] = &DD7[4 * numberOfLBnodes];
+				D7.f[6] = &DD7[5 * numberOfLBnodes];
+				D7.f[5] = &DD7[6 * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			real fTNE = (D.f[DIR_MMM])[kbsw];
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
-										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-										   //real fW    =  (D.f[DIR_M00   ])[kw ];
-										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
-										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-										   //real fB    =  (D.f[DIR_00M   ])[kb ];
-										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
-										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
-										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
-										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fE    =  (D.f[DIR_P00])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00])[kw ];
+										   //real fN    =  (D.f[DIR_0P0])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0])[ks ];
+										   //real fT    =  (D.f[DIR_00P])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP])[ks ];//kts
 										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
-										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
-										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+										   //real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
index 25a17ddbd7038635a2beb2c39212822cbf762034..845ecda946a4e45678082b72b5c74dc96e5810c5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD7,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd);
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
index d2f9f60890379d07ecc3d04f4a54d59a0754907a..8c99f3b030984aef6215d5479be4b321145ee54f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Paramet
 
 void BGKCompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Comp_SP_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed");
 }
 
 BGKCompSP27::BGKCompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
index 09196d13e94a2404ba280e8a8e9394f0a79e8211..3bdb65c455bd67d66e8b35961f2fa7e1de45f763 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
@@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
index beebda2437ca4e7385ab812b9106edabe213227e..a4b136d1c21b1e4c68432eef5e21ff8c968bdfec 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKPlusCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr
 
 void BGKPlusCompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Plus_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															size_Mat,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Plus_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed");
 }
 
 BGKPlusCompSP27::BGKPlusCompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
index 325f65ece9baddf88adc91baa753bdfc4bd0eced..1f44fee9ea8b20241f87bea6310c96db2b82d1c4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
@@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -127,33 +127,33 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
index 3d7f6fb9a8980454ebc83c51c7dd8865688fa166..1107d343801f8ac3626b03a93ca92415217732ac 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -6,6 +6,7 @@
 #include "../RunLBMKernel.cuh"
 
 #include <lbm/BGK.h>
+#include <lbm/KernelParameter.h>
 
 
 namespace vf
@@ -31,15 +32,16 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
 
 void BGKUnified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 nullptr, /* forces not used in bgk kernel */
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        nullptr, /* forces not used in bgk kernel */
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::bgk(parameter);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
index eca3a9953024e44fd91e7f9f98956e4329574d09..dcfda06db462fd83120751a32a40365445d659ba 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CascadeCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr
 
 void CascadeCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cascade_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															para->getParD(level)->numberOfNodes,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cascade_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed");
 }
 
 CascadeCompSP27::CascadeCompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
index 3f69fa47288343fbdd91e77dbb7f154501349098..af0a7c118191243c80c420856a70711a1fc17d2b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ]
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 						   mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
index 3f45c7ea71c385f948eac2e052a8d970010c413d..7817c398285dda131401bd14c3ccdd8c119c5680 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_p
 
 void CumulantCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_Comp_SP_27 execution failed");
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
index ad2ffdf4170d98125e6758c0e2f548122093cea6..1dfab5846795e61509cdba28478fe6ce623983b5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
index 9a84df86e41b3fdff75c2ebf580813afc5ee3feb..1518dcc209de1edf8a88dae72c1f10c3d4666610 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
@@ -1,8 +1,8 @@
 #include "CumulantAll4CompSP27.h"
 
 #include "CumulantAll4CompSP27_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,36 +11,21 @@ std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std::
 
 void CumulantAll4CompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cumulant_D3Q27All4 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															size_Mat,
-															level,
-															para->getForcesDev(),
-                                                            para->getQuadricLimitersDev(),
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cumulant_D3Q27All4 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed");
 }
 
 CumulantAll4CompSP27::CumulantAll4CompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
index 681dbff2ba37a1e0de56341b39cc2dec791f656b..3593b41c4c62c8a8b19719e22e9d65d6b5fd987d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
@@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -160,33 +160,33 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
index 1b6ba1a2278b68f085a4b7df699b7ca230811f39..5a480e5d9c97126e491655b4bbe2aeefef3e7161 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
@@ -12,7 +12,7 @@ std::shared_ptr<CumulantK15Comp> CumulantK15Comp::getNewInstance(std::shared_ptr
 void CumulantK15Comp::run()
 {
 	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
 	int Grid1, Grid2;
@@ -29,16 +29,17 @@ void CumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_CumulantK15Comp <<< grid, threads >>>(para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													size_Mat,
-													level,
-													para->getForcesDev(),
-													para->getParD(level)->isEvenTimestep);
+	LB_Kernel_CumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_CumulantK15Comp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
index 93d57d6c9871d66537f25b9188467d46e3b3d05c..f7fb1f0a6441cfc6f38ad9684fd5bc8dd1be7135 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
index 188984d001f89d72c967dd6390ca10ae5d2eab32..51876f30b8c8e37d8cb3355edde5dcf2b04675d0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15BulkComp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,35 +11,20 @@ std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::sh
 
 void CumulantK15BulkComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15BulkComp <<< grid, threads >>>(para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getForcesDev(),
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15BulkComp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed");
 }
 
 CumulantK15BulkComp::CumulantK15BulkComp(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
index d2a2f61df902cfd7c5ef52b09f8e7738a108615e..085775d324bf65d783afdd745c06429d697c3788 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
index d28c077031ff9125d1cbc1187def1d1d8fe4d6e8..613464125bafc572fe7951b8c372e3455ea5b21d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15SpongeComp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std
 
 void CumulantK15SpongeComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15SpongeComp <<< grid, threads >>>(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->coordinateX,
-															para->getParD(level)->coordinateY,
-															para->getParD(level)->coordinateZ,
-															para->getParD(level)->distributions.f[0],
-															para->getParD(level)->numberOfNodes,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15SpongeComp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->coordinateX,
+        para->getParD(level)->coordinateY,
+        para->getParD(level)->coordinateZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed");
 }
 
 CumulantK15SpongeComp::CumulantK15SpongeComp(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
index c2144d324aa3378e8fc9fc5b511bbed385b48a84..13788e65e70eb30803111a39a70d39682648a006 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index 0b72b46cf25f331172be4abb8dded6d8e5e2b9c5..24b0bbc6f43a63093da6b6dcb3ce401b8a614f75 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -30,15 +30,16 @@ CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int leve
 
 void CumulantK15Unified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 para->getParD(level)->forcing,
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        para->getParD(level)->forcing,
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
new file mode 100644
index 0000000000000000000000000000000000000000..ea3442fecca63fdcb45878d742a547ce492ab5c8
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
@@ -0,0 +1,140 @@
+#include "CumulantK17.h"
+#include <logger/Logger.h>
+#include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
+#include "CumulantK17_Device.cuh"
+
+#include <cuda.h>
+
+template<TurbulenceModel turbulenceModel>
+std::shared_ptr< CumulantK17<turbulenceModel> > CumulantK17<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level)
+{
+    return std::shared_ptr<CumulantK17<turbulenceModel> >(new CumulantK17<turbulenceModel>(para,level));
+}
+
+template<TurbulenceModel turbulenceModel>
+void CumulantK17<turbulenceModel>::run()
+{
+    LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads >>>(   para->getParD(level)->omega,
+                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                        para->getParD(level)->rho,
+                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                        para->getSGSConstant(),
+                                                                                                        para->getParD(level)->numberOfNodes,
+                                                                                                        level,
+                                                                                                        para->getForcesDev(),
+                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                        para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default],
+                                                                                                        para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]);
+
+    getLastCudaError("LB_Kernel_CumulantK17 execution failed");
+}
+
+template<TurbulenceModel turbulenceModel>
+void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex )
+{
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
+
+    switch (collisionTemplate)
+    {
+        case CollisionTemplate::Default:
+            LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;
+
+        case CollisionTemplate::WriteMacroVars:
+            LB_Kernel_CumulantK17 < turbulenceModel, true, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;
+
+        case CollisionTemplate::SubDomainBorder:
+        case CollisionTemplate::AllFeatures:
+            LB_Kernel_CumulantK17 < turbulenceModel, true, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(  para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;	case CollisionTemplate::ApplyBodyForce:
+            LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;	default:
+            throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
+            break;
+    }
+
+    getLastCudaError("LB_Kernel_CumulantK17 execution failed");
+}
+
+template<TurbulenceModel turbulenceModel>
+CumulantK17<turbulenceModel>::CumulantK17(std::shared_ptr<Parameter> para, int level)
+{
+    this->para = para;
+    this->level = level;
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+    this->kernelUsesFluidNodeIndices = true;
+
+    VF_LOG_INFO("Using turbulence model: {}", turbulenceModel);
+}
+
+template class CumulantK17<TurbulenceModel::AMD>;
+template class CumulantK17<TurbulenceModel::Smagorinsky>;
+template class CumulantK17<TurbulenceModel::QR>;
+template class CumulantK17<TurbulenceModel::None>;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h
new file mode 100644
index 0000000000000000000000000000000000000000..00c79a30c9ccf9a89901165d020fc85d5a479c1d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h
@@ -0,0 +1,20 @@
+#ifndef CUMULANT_K17_H
+#define CUMULANT_K17_H
+
+#include "Kernel/KernelImp.h"
+#include "Parameter/Parameter.h"
+
+template<TurbulenceModel turbulenceModel> 
+class CumulantK17 : public KernelImp
+{
+public:
+	static std::shared_ptr< CumulantK17<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level);
+	void run() override;
+    void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex) override;
+
+private:
+    CumulantK17();
+    CumulantK17(std::shared_ptr<Parameter> para, int level);
+};
+
+#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu
deleted file mode 100644
index b176b94d07e7f280d738a797d5bd853095e3caed..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "CumulantK17Comp.h"
-
-#include "Parameter/Parameter.h"
-#include "CumulantK17Comp_Device.cuh"
-#include "cuda/CudaGrid.h"
-
-std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
-{
-	return std::shared_ptr<CumulantK17Comp>(new CumulantK17Comp(para,level));
-}
-
-void CumulantK17Comp::run()
-{
-	LB_Kernel_CumulantK17Comp <<< cudaGrid.grid, cudaGrid.threads >>>(para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													level,
-													para->getForcesDev(),
-                                                    para->getQuadricLimitersDev(),
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK17Comp execution failed");
-}
-
-CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
-{
-	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
-	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h
deleted file mode 100644
index 22a95a688e5d078d7b710f494bfea360c9af0d6b..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef CUMULANT_K17_COMP_H
-#define CUMULANT_K17_COMP_H
-
-#include "Kernel/KernelImp.h"
-
-class CumulantK17Comp : public KernelImp
-{
-public:
-	static std::shared_ptr<CumulantK17Comp> getNewInstance(std::shared_ptr< Parameter> para, int level);
-	void run();
-
-private:
-	CumulantK17Comp();
-	CumulantK17Comp(std::shared_ptr< Parameter> para, int level);
-};
-
-#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
deleted file mode 100644
index 7cf27aa883cbfd3a0e4a0a36fa61649a62d06eeb..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
+++ /dev/null
@@ -1,1040 +0,0 @@
-#include "LBM/LB.h" 
-#include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-#include "math.h"
-
-
-__global__ void LB_Kernel_CumulantK17Comp(real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	real* quadricLimiters,
-	bool EvenOrOdd)
-{
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		unsigned int BC;
-		BC = bcMatD[k];
-
-		if ((BC != GEO_SOLID) && (BC != GEO_VOID))
-		{
-			Distributions27 D;
-			if (EvenOrOdd == true)
-			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
-			}
-			else
-			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
-			}
-
-			////////////////////////////////////////////////////////////////////////////////
-			//index
-			//unsigned int kzero= k;
-			//unsigned int ke   = k;
-			unsigned int kw = neighborX[k];
-			//unsigned int kn   = k;
-			unsigned int ks = neighborY[k];
-			//unsigned int kt   = k;
-			unsigned int kb = neighborZ[k];
-			unsigned int ksw = neighborY[kw];
-			//unsigned int kne  = k;
-			//unsigned int kse  = ks;
-			//unsigned int knw  = kw;
-			unsigned int kbw = neighborZ[kw];
-			//unsigned int kte  = k;
-			//unsigned int kbe  = kb;
-			//unsigned int ktw  = kw;
-			unsigned int kbs = neighborZ[ks];
-			//unsigned int ktn  = k;
-			//unsigned int kbn  = kb;
-			//unsigned int kts  = ks;
-			//unsigned int ktse = ks;
-			//unsigned int kbnw = kbw;
-			//unsigned int ktnw = kw;
-			//unsigned int kbse = kbs;
-			//unsigned int ktsw = ksw;
-			//unsigned int kbne = kb;
-			//unsigned int ktne = k;
-			unsigned int kbsw = neighborZ[ksw];
-
-			//unsigned int kzero= k;
-			//unsigned int ke   = k;
-			//unsigned int kw   = neighborX[k];
-			//unsigned int kn   = k;
-			//unsigned int ks   = neighborY[k];
-			//unsigned int kt   = k;
-			//unsigned int kb   = neighborZ[k];
-			//unsigned int ksw  = neighborY[kw];
-			//unsigned int kne  = k;
-			//unsigned int kse  = ks;
-			//unsigned int knw  = kw;
-			//unsigned int kbw  = neighborZ[kw];
-			//unsigned int kte  = k;
-			//unsigned int kbe  = kb;
-			//unsigned int ktw  = kw;
-			//unsigned int kbs  = neighborZ[ks];
-			//unsigned int ktn  = k;
-			//unsigned int kbn  = kb;
-			//unsigned int kts  = ks;
-			//unsigned int ktse = ks;
-			//unsigned int kbnw = kbw;
-			//unsigned int ktnw = kw;
-			//unsigned int kbse = kbs;
-			//unsigned int ktsw = ksw;
-			//unsigned int kbne = kb;
-			//unsigned int ktne = k;
-			//unsigned int kbsw = neighborZ[ksw];
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
-			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
-											////////////////////////////////////////////////////////////////////////////////////
-			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-			real rho = c1o1 + drho;
-			////////////////////////////////////////////////////////////////////////////////////
-			//slow
-			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
-			//					   (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
-			//						((mfabb+mfcbb) + (mfbab+mfbcb)  +  (mfbba+mfbbc)));//fehlt mfbbb
-			real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-				(mfcbb - mfabb)) / rho;
-			real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-				(mfbcb - mfbab)) / rho;
-			real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-				(mfbbc - mfbba)) / rho;
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
-			real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
-			real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
-			vvx += fx*c1o2;
-			vvy += fy*c1o2;
-			vvz += fz*c1o2;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real omega = omega_in;
-			////////////////////////////////////////////////////////////////////////////////////
-			//fast
-			real oMdrho = c1o1; // comp special
-							   //real oMdrho = one - (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
-							   //					   mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
-							   //					   mfabb+mfcbb + mfbab+mfbcb + mfbba+mfbbc + mfbbb);//fehlt mfbbb nicht mehr
-							   //real vvx    =mfccc-mfaaa + mfcac-mfaca + mfcaa-mfacc + mfcca-mfaac + 
-							   //				mfcba-mfabc + mfcbc-mfaba + mfcab-mfacb + mfccb-mfaab +
-							   //				mfcbb-mfabb;
-							   //real vvy    =mfccc-mfaaa + mfaca-mfcac + mfacc-mfcaa + mfcca-mfaac + 
-							   //				mfbca-mfbac + mfbcc-mfbaa + mfacb-mfcab + mfccb-mfaab +
-							   //				mfbcb-mfbab;
-							   //real vvz    =mfccc-mfaaa + mfcac-mfaca + mfacc-mfcaa + mfaac-mfcca + 
-							   //				mfbac-mfbca + mfbcc-mfbaa + mfabc-mfcba + mfcbc-mfaba +
-							   //				mfbbc-mfbba;
-							   ////////////////////////////////////////////////////////////////////////////////////
-							   // oMdrho assembler style -------> faaaaaastaaaa
-							   // or much sloooowaaaa ... it depï¿½ndssssss on sadaku
-			real m0, m1, m2;
-			//real oMdrho;
-			//{
-			//	oMdrho=mfccc+mfaaa;
-			//	m0=mfaca+mfcac;
-			//	m1=mfacc+mfcaa;
-			//	m2=mfaac+mfcca;
-			//	oMdrho+=m0;
-			//	m1+=m2;
-			//	oMdrho+=m1;
-			//	m0=mfbac+mfbca;
-			//	m1=mfbaa+mfbcc;
-			//	m0+=m1;
-			//	m1=mfabc+mfcba;
-			//	m2=mfaba+mfcbc;
-			//	m1+=m2;
-			//	m0+=m1;
-			//	m1=mfacb+mfcab;
-			//	m2=mfaab+mfccb;
-			//	m1+=m2;
-			//	m0+=m1;
-			//	oMdrho+=m0;
-			//	m0=mfabb+mfcbb;
-			//	m1=mfbab+mfbcb;
-			//	m2=mfbba+mfbbc;
-			//	m0+=m1+m2;
-			//	m0+=mfbbb; //hat gefehlt
-			//	oMdrho = one - (oMdrho + m0);
-			//}
-			//real vvx;
-			real vx2;
-			//{
-			//	vvx = mfccc-mfaaa;
-			//	m0  = mfcac-mfaca;
-			//	m1  = mfcaa-mfacc;
-			//	m2  = mfcca-mfaac;
-			//	vvx+= m0;
-			//	m1 += m2;
-			//	vvx+= m1;
-			//	vx2 = mfcba-mfabc;
-			//	m0  = mfcbc-mfaba;
-			//	m1  = mfcab-mfacb;
-			//	m2  = mfccb-mfaab;
-			//	vx2+= m0;
-			//	m1 += m2;
-			//	vx2+= m1;
-			//	vvx+= vx2;
-			//	vx2 = mfcbb-mfabb;
-			//	vvx+= vx2;
-			//}
-			//real vvy;
-			real vy2;
-			//{
-			//	vvy = mfccc-mfaaa;
-			//	m0  = mfaca-mfcac;
-			//	m1  = mfacc-mfcaa;
-			//	m2  = mfcca-mfaac;
-			//	vvy+= m0;
-			//	m1 += m2;
-			//	vvy+= m1;
-			//	vy2 = mfbca-mfbac;
-			//	m0  = mfbcc-mfbaa;
-			//	m1  = mfacb-mfcab;
-			//	m2  = mfccb-mfaab;
-			//	vy2+= m0;
-			//	m1 += m2;
-			//	vy2+= m1;
-			//	vvy+= vy2;
-			//	vy2 = mfbcb-mfbab;
-			//	vvy+= vy2;
-			//}
-			//real vvz;
-			real vz2;
-			//{
-			//	vvz = mfccc-mfaaa;
-			//	m0  = mfcac-mfaca;
-			//	m1  = mfacc-mfcaa;
-			//	m2  = mfaac-mfcca;
-			//	vvz+= m0;
-			//	m1 += m2;
-			//	vvz+= m1;
-			//	vz2 = mfbac-mfbca;
-			//	m0  = mfbcc-mfbaa;
-			//	m1  = mfabc-mfcba;
-			//	m2  = mfcbc-mfaba;
-			//	vz2+= m0;
-			//	m1 += m2;
-			//	vz2+= m1;
-			//	vvz+= vz2;
-			//	vz2 = mfbbc-mfbba;
-			//	vvz+= vz2;
-			//}
-			vx2 = vvx*vvx;
-			vy2 = vvy*vvy;
-			vz2 = vvz*vvz;
-			////////////////////////////////////////////////////////////////////////////////////
-			real wadjust;
-			real qudricLimitP = quadricLimiters[0];  //0.01f; //  * 0.0001f; // 1000000.0f; // 1000000.0f; //
-			real qudricLimitM = quadricLimiters[1];  //0.01f; //  * 0.0001f; // 1000000.0f; // 1000000.0f; //
-			real qudricLimitD = quadricLimiters[2];  //0.01f; //  * 0.001f;  // 1000000.0f; // 1000000.0f; //
-									  ////////////////////////////////////////////////////////////////////////////////////
-									  //Hin
-									  ////////////////////////////////////////////////////////////////////////////////////
-									  // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
-									  ////////////////////////////////////////////////////////////////////////////////////
-									  // Z - Dir
-			m2 = mfaaa + mfaac;
-			m1 = mfaac - mfaaa;
-			m0 = m2 + mfaab;
-			mfaaa = m0;
-			m0 += c1o36 * oMdrho;
-			mfaab = m1 - m0 * vvz;
-			mfaac = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaba + mfabc;
-			m1 = mfabc - mfaba;
-			m0 = m2 + mfabb;
-			mfaba = m0;
-			m0 += c1o9 * oMdrho;
-			mfabb = m1 - m0 * vvz;
-			mfabc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaca + mfacc;
-			m1 = mfacc - mfaca;
-			m0 = m2 + mfacb;
-			mfaca = m0;
-			m0 += c1o36 * oMdrho;
-			mfacb = m1 - m0 * vvz;
-			mfacc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbaa + mfbac;
-			m1 = mfbac - mfbaa;
-			m0 = m2 + mfbab;
-			mfbaa = m0;
-			m0 += c1o9 * oMdrho;
-			mfbab = m1 - m0 * vvz;
-			mfbac = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbba + mfbbc;
-			m1 = mfbbc - mfbba;
-			m0 = m2 + mfbbb;
-			mfbba = m0;
-			m0 += c4o9 * oMdrho;
-			mfbbb = m1 - m0 * vvz;
-			mfbbc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbca + mfbcc;
-			m1 = mfbcc - mfbca;
-			m0 = m2 + mfbcb;
-			mfbca = m0;
-			m0 += c1o9 * oMdrho;
-			mfbcb = m1 - m0 * vvz;
-			mfbcc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcaa + mfcac;
-			m1 = mfcac - mfcaa;
-			m0 = m2 + mfcab;
-			mfcaa = m0;
-			m0 += c1o36 * oMdrho;
-			mfcab = m1 - m0 * vvz;
-			mfcac = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcba + mfcbc;
-			m1 = mfcbc - mfcba;
-			m0 = m2 + mfcbb;
-			mfcba = m0;
-			m0 += c1o9 * oMdrho;
-			mfcbb = m1 - m0 * vvz;
-			mfcbc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcca + mfccc;
-			m1 = mfccc - mfcca;
-			m0 = m2 + mfccb;
-			mfcca = m0;
-			m0 += c1o36 * oMdrho;
-			mfccb = m1 - m0 * vvz;
-			mfccc = m2 - c2o1*	m1 * vvz + vz2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Y - Dir
-			m2 = mfaaa + mfaca;
-			m1 = mfaca - mfaaa;
-			m0 = m2 + mfaba;
-			mfaaa = m0;
-			m0 += c1o6 * oMdrho;
-			mfaba = m1 - m0 * vvy;
-			mfaca = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaab + mfacb;
-			m1 = mfacb - mfaab;
-			m0 = m2 + mfabb;
-			mfaab = m0;
-			mfabb = m1 - m0 * vvy;
-			mfacb = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaac + mfacc;
-			m1 = mfacc - mfaac;
-			m0 = m2 + mfabc;
-			mfaac = m0;
-			m0 += c1o18 * oMdrho;
-			mfabc = m1 - m0 * vvy;
-			mfacc = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbaa + mfbca;
-			m1 = mfbca - mfbaa;
-			m0 = m2 + mfbba;
-			mfbaa = m0;
-			m0 += c2o3 * oMdrho;
-			mfbba = m1 - m0 * vvy;
-			mfbca = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbab + mfbcb;
-			m1 = mfbcb - mfbab;
-			m0 = m2 + mfbbb;
-			mfbab = m0;
-			mfbbb = m1 - m0 * vvy;
-			mfbcb = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfbac + mfbcc;
-			m1 = mfbcc - mfbac;
-			m0 = m2 + mfbbc;
-			mfbac = m0;
-			m0 += c2o9 * oMdrho;
-			mfbbc = m1 - m0 * vvy;
-			mfbcc = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcaa + mfcca;
-			m1 = mfcca - mfcaa;
-			m0 = m2 + mfcba;
-			mfcaa = m0;
-			m0 += c1o6 * oMdrho;
-			mfcba = m1 - m0 * vvy;
-			mfcca = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcab + mfccb;
-			m1 = mfccb - mfcab;
-			m0 = m2 + mfcbb;
-			mfcab = m0;
-			mfcbb = m1 - m0 * vvy;
-			mfccb = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfcac + mfccc;
-			m1 = mfccc - mfcac;
-			m0 = m2 + mfcbc;
-			mfcac = m0;
-			m0 += c1o18 * oMdrho;
-			mfcbc = m1 - m0 * vvy;
-			mfccc = m2 - c2o1*	m1 * vvy + vy2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// X - Dir
-			m2 = mfaaa + mfcaa;
-			m1 = mfcaa - mfaaa;
-			m0 = m2 + mfbaa;
-			mfaaa = m0;
-			m0 += c1o1* oMdrho;
-			mfbaa = m1 - m0 * vvx;
-			mfcaa = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaba + mfcba;
-			m1 = mfcba - mfaba;
-			m0 = m2 + mfbba;
-			mfaba = m0;
-			mfbba = m1 - m0 * vvx;
-			mfcba = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaca + mfcca;
-			m1 = mfcca - mfaca;
-			m0 = m2 + mfbca;
-			mfaca = m0;
-			m0 += c1o3 * oMdrho;
-			mfbca = m1 - m0 * vvx;
-			mfcca = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaab + mfcab;
-			m1 = mfcab - mfaab;
-			m0 = m2 + mfbab;
-			mfaab = m0;
-			mfbab = m1 - m0 * vvx;
-			mfcab = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfabb + mfcbb;
-			m1 = mfcbb - mfabb;
-			m0 = m2 + mfbbb;
-			mfabb = m0;
-			mfbbb = m1 - m0 * vvx;
-			mfcbb = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfacb + mfccb;
-			m1 = mfccb - mfacb;
-			m0 = m2 + mfbcb;
-			mfacb = m0;
-			mfbcb = m1 - m0 * vvx;
-			mfccb = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfaac + mfcac;
-			m1 = mfcac - mfaac;
-			m0 = m2 + mfbac;
-			mfaac = m0;
-			m0 += c1o3 * oMdrho;
-			mfbac = m1 - m0 * vvx;
-			mfcac = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfabc + mfcbc;
-			m1 = mfcbc - mfabc;
-			m0 = m2 + mfbbc;
-			mfabc = m0;
-			mfbbc = m1 - m0 * vvx;
-			mfcbc = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			m2 = mfacc + mfccc;
-			m1 = mfccc - mfacc;
-			m0 = m2 + mfbcc;
-			mfacc = m0;
-			m0 += c1o9 * oMdrho;
-			mfbcc = m1 - m0 * vvx;
-			mfccc = m2 - c2o1*	m1 * vvx + vx2 * m0;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// Cumulants
-			////////////////////////////////////////////////////////////////////////////////////
-			real OxxPyyPzz = c1o1;	//set the bulk viscosity one is high / two is very low and zero is (too) high ... (also called omega 2)
-
-									////////////////////////////////////////////////////////////
-									//3.
-									//////////////////////////////
-			real OxyyPxzz = c8o1*(-c2o1 + omega)*(c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega);//one;
-			real OxyyMxzz = c8o1*(-c2o1 + omega)*(-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega);//one;
-			real Oxyz = c24o1*(-c2o1 + omega)*(-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega);//one;
-																																										  ////////////////////////////////////////////////////////////
-																																										  //4.
-																																										  //////////////////////////////
-			real O4 = c1o1;
-			//////////////////////////////
-			//real O4        = omega;//TRT
-			////////////////////////////////////////////////////////////
-			//5.
-			//////////////////////////////
-			real O5 = c1o1;
-			////////////////////////////////////////////////////////////
-			//6.
-			//////////////////////////////
-			real O6 = c1o1;
-			////////////////////////////////////////////////////////////
-
-
-			//central moments to cumulants
-			//4.
-			real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-
-			real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-
-			//2.
-			// linear combinations
-			real mxxPyyPzz = mfcaa + mfaca + mfaac;
-			real mxxMyy = mfcaa - mfaca;
-			real mxxMzz = mfcaa - mfaac;
-
-			////////////////////////////////////////////////////////////////////////////
-			real Dxy = -c3o1*omega*mfbba;
-			real Dxz = -c3o1*omega*mfbab;
-			real Dyz = -c3o1*omega*mfabb;
-
-			//3.
-			// linear combinations
-
-			real mxxyPyzz = mfcba + mfabc;
-			real mxxyMyzz = mfcba - mfabc;
-
-			real mxxzPyyz = mfcab + mfacb;
-			real mxxzMyyz = mfcab - mfacb;
-
-			real mxyyPxzz = mfbca + mfbac;
-			real mxyyMxzz = mfbca - mfbac;
-
-			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
-
-			real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
-			real dyuy = dxux + omega * c3o2 * mxxMyy;
-			real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-			//relax
-			mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-			mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-			mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			////no correction
-			//mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz);//-magicBulk*OxxPyyPzz;
-			//mxxMyy    += -(-omega) * (-mxxMyy);
-			//mxxMzz    += -(-omega) * (-mxxMzz);
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			mfabb += omega * (-mfabb);
-			mfbab += omega * (-mfbab);
-			mfbba += omega * (-mfbba);
-			//////////////////////////////////////////////////////////////////////////
-
-			// linear combinations back
-			mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
-
-
-			//relax
-			//////////////////////////////////////////////////////////////////////////
-			//das ist der limiter
-			wadjust = Oxyz + (c1o1 - Oxyz)*abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
-			mfbbb += wadjust * (-mfbbb);
-			wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
-			mxxyPyzz += wadjust * (-mxxyPyzz);
-			wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
-			mxxyMyzz += wadjust * (-mxxyMyzz);
-			wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
-			mxxzPyyz += wadjust * (-mxxzPyyz);
-			wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
-			mxxzMyyz += wadjust * (-mxxzMyyz);
-			wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
-			mxyyPxzz += wadjust * (-mxyyPxzz);
-			wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
-			mxyyMxzz += wadjust * (-mxyyMxzz);
-			//////////////////////////////////////////////////////////////////////////
-			//ohne limiter
-			//mfbbb     += OxyyMxzz * (-mfbbb);
-			//mxxyPyzz  += OxyyPxzz * (-mxxyPyzz);
-			//mxxyMyzz  += OxyyMxzz * (-mxxyMyzz);
-			//mxxzPyyz  += OxyyPxzz * (-mxxzPyyz);
-			//mxxzMyyz  += OxyyMxzz * (-mxxzMyyz);
-			//mxyyPxzz  += OxyyPxzz * (-mxyyPxzz);
-			//mxyyMxzz  += OxyyMxzz * (-mxyyMxzz);
-			//////////////////////////////////////////////////////////////////////////
-
-			// linear combinations back
-			mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-			mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-			mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-			mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-			mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-			mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-
-			//4.
-			//////////////////////////////////////////////////////////////////////////
-			//mit limiter
-			//	wadjust    = O4+(one-O4)*abs(CUMacc)/(abs(CUMacc)+qudricLimit);
-			//CUMacc    += wadjust * (-CUMacc);
-			//	wadjust    = O4+(one-O4)*abs(CUMcac)/(abs(CUMcac)+qudricLimit);
-			//CUMcac    += wadjust * (-CUMcac); 
-			//	wadjust    = O4+(one-O4)*abs(CUMcca)/(abs(CUMcca)+qudricLimit);
-			//CUMcca    += wadjust * (-CUMcca); 
-
-			//	wadjust    = O4+(one-O4)*abs(CUMbbc)/(abs(CUMbbc)+qudricLimit);
-			//CUMbbc    += wadjust * (-CUMbbc); 
-			//	wadjust    = O4+(one-O4)*abs(CUMbcb)/(abs(CUMbcb)+qudricLimit);
-			//CUMbcb    += wadjust * (-CUMbcb); 
-			//	wadjust    = O4+(one-O4)*abs(CUMcbb)/(abs(CUMcbb)+qudricLimit);
-			//CUMcbb    += wadjust * (-CUMcbb); 
-			//////////////////////////////////////////////////////////////////////////
-			real factorA = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega);
-			real factorB = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega);
-			//////////////////////////////////////////////////////////////////////////
-			//ohne limiter
-			//CUMacc += O4 * (-CUMacc); 
-			//CUMcac += O4 * (-CUMcac); 
-			//CUMcca += O4 * (-CUMcca); 
-			//CUMbbc += O4 * (-CUMbbc); 
-			//CUMbcb += O4 * (-CUMbcb); 
-			//CUMcbb += O4 * (-CUMcbb); 
-			CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMacc);
-			CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMcac);
-			CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (CUMcca);
-			CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * factorB + (c1o1 - O4) * (CUMbbc);
-			CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * factorB + (c1o1 - O4) * (CUMbcb);
-			CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * factorB + (c1o1 - O4) * (CUMcbb);
-			//////////////////////////////////////////////////////////////////////////
-
-
-			//5.
-			CUMbcc += O5 * (-CUMbcc);
-			CUMcbc += O5 * (-CUMcbc);
-			CUMccb += O5 * (-CUMccb);
-
-			//6.
-			CUMccc += O6 * (-CUMccc);
-
-
-
-			//back cumulants to central moments
-			//4.
-			mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-
-			mfccc = CUMccc - ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-			////////////////////////////////////////////////////////////////////////////////////
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			mfbaa = -mfbaa;
-			mfaba = -mfaba;
-			mfaab = -mfaab;
-			////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//back
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Z - Dir
-			m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + c1o1* oMdrho) * (vz2 - vvz) * c1o2;
-			m1 = -mfaac - c2o1* mfaab *  vvz + mfaaa                * (c1o1 - vz2) - c1o1* oMdrho * vz2;
-			m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + c1o1* oMdrho) * (vz2 + vvz) * c1o2;
-			mfaaa = m0;
-			mfaab = m1;
-			mfaac = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfabc * c1o2 + mfabb * (vvz - c1o2) + mfaba * (vz2 - vvz) * c1o2;
-			m1 = -mfabc - c2o1* mfabb *  vvz + mfaba * (c1o1 - vz2);
-			m2 = mfabc * c1o2 + mfabb * (vvz + c1o2) + mfaba * (vz2 + vvz) * c1o2;
-			mfaba = m0;
-			mfabb = m1;
-			mfabc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-			m1 = -mfacc - c2o1* mfacb *  vvz + mfaca                  * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
-			m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
-			mfaca = m0;
-			mfacb = m1;
-			mfacc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfbac * c1o2 + mfbab * (vvz - c1o2) + mfbaa * (vz2 - vvz) * c1o2;
-			m1 = -mfbac - c2o1* mfbab *  vvz + mfbaa * (c1o1 - vz2);
-			m2 = mfbac * c1o2 + mfbab * (vvz + c1o2) + mfbaa * (vz2 + vvz) * c1o2;
-			mfbaa = m0;
-			mfbab = m1;
-			mfbac = m2;
-			/////////b//////////////////////////////////////////////////////////////////////////
-			m0 = mfbbc * c1o2 + mfbbb * (vvz - c1o2) + mfbba * (vz2 - vvz) * c1o2;
-			m1 = -mfbbc - c2o1* mfbbb *  vvz + mfbba * (c1o1 - vz2);
-			m2 = mfbbc * c1o2 + mfbbb * (vvz + c1o2) + mfbba * (vz2 + vvz) * c1o2;
-			mfbba = m0;
-			mfbbb = m1;
-			mfbbc = m2;
-			/////////b//////////////////////////////////////////////////////////////////////////
-			m0 = mfbcc * c1o2 + mfbcb * (vvz - c1o2) + mfbca * (vz2 - vvz) * c1o2;
-			m1 = -mfbcc - c2o1* mfbcb *  vvz + mfbca * (c1o1 - vz2);
-			m2 = mfbcc * c1o2 + mfbcb * (vvz + c1o2) + mfbca * (vz2 + vvz) * c1o2;
-			mfbca = m0;
-			mfbcb = m1;
-			mfbcc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-			m1 = -mfcac - c2o1* mfcab *  vvz + mfcaa                  * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
-			m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
-			mfcaa = m0;
-			mfcab = m1;
-			mfcac = m2;
-			/////////c//////////////////////////////////////////////////////////////////////////
-			m0 = mfcbc * c1o2 + mfcbb * (vvz - c1o2) + mfcba * (vz2 - vvz) * c1o2;
-			m1 = -mfcbc - c2o1* mfcbb *  vvz + mfcba * (c1o1 - vz2);
-			m2 = mfcbc * c1o2 + mfcbb * (vvz + c1o2) + mfcba * (vz2 + vvz) * c1o2;
-			mfcba = m0;
-			mfcbb = m1;
-			mfcbc = m2;
-			/////////c//////////////////////////////////////////////////////////////////////////
-			m0 = mfccc * c1o2 + mfccb * (vvz - c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 - vvz) * c1o2;
-			m1 = -mfccc - c2o1* mfccb *  vvz + mfcca                  * (c1o1 - vz2) - c1o9 * oMdrho * vz2;
-			m2 = mfccc * c1o2 + mfccb * (vvz + c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 + vvz) * c1o2;
-			mfcca = m0;
-			mfccb = m1;
-			mfccc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Y - Dir
-			m0 = mfaca * c1o2 + mfaba * (vvy - c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfaca - c2o1* mfaba *  vvy + mfaaa                  * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
-			m2 = mfaca * c1o2 + mfaba * (vvy + c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfaaa = m0;
-			mfaba = m1;
-			mfaca = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfacb * c1o2 + mfabb * (vvy - c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfacb - c2o1* mfabb *  vvy + mfaab                  * (c1o1 - vy2) - c2o3 * oMdrho * vy2;
-			m2 = mfacb * c1o2 + mfabb * (vvy + c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfaab = m0;
-			mfabb = m1;
-			mfacb = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfacc * c1o2 + mfabc * (vvy - c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfacc - c2o1* mfabc *  vvy + mfaac                  * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
-			m2 = mfacc * c1o2 + mfabc * (vvy + c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfaac = m0;
-			mfabc = m1;
-			mfacc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfbca * c1o2 + mfbba * (vvy - c1o2) + mfbaa * (vy2 - vvy) * c1o2;
-			m1 = -mfbca - c2o1* mfbba *  vvy + mfbaa * (c1o1 - vy2);
-			m2 = mfbca * c1o2 + mfbba * (vvy + c1o2) + mfbaa * (vy2 + vvy) * c1o2;
-			mfbaa = m0;
-			mfbba = m1;
-			mfbca = m2;
-			/////////b//////////////////////////////////////////////////////////////////////////
-			m0 = mfbcb * c1o2 + mfbbb * (vvy - c1o2) + mfbab * (vy2 - vvy) * c1o2;
-			m1 = -mfbcb - c2o1* mfbbb *  vvy + mfbab * (c1o1 - vy2);
-			m2 = mfbcb * c1o2 + mfbbb * (vvy + c1o2) + mfbab * (vy2 + vvy) * c1o2;
-			mfbab = m0;
-			mfbbb = m1;
-			mfbcb = m2;
-			/////////b//////////////////////////////////////////////////////////////////////////
-			m0 = mfbcc * c1o2 + mfbbc * (vvy - c1o2) + mfbac * (vy2 - vvy) * c1o2;
-			m1 = -mfbcc - c2o1* mfbbc *  vvy + mfbac * (c1o1 - vy2);
-			m2 = mfbcc * c1o2 + mfbbc * (vvy + c1o2) + mfbac * (vy2 + vvy) * c1o2;
-			mfbac = m0;
-			mfbbc = m1;
-			mfbcc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcca * c1o2 + mfcba * (vvy - c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfcca - c2o1* mfcba *  vvy + mfcaa                   * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
-			m2 = mfcca * c1o2 + mfcba * (vvy + c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfcaa = m0;
-			mfcba = m1;
-			mfcca = m2;
-			/////////c//////////////////////////////////////////////////////////////////////////
-			m0 = mfccb * c1o2 + mfcbb * (vvy - c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfccb - c2o1* mfcbb *  vvy + mfcab                  * (c1o1 - vy2) - c2o9 * oMdrho * vy2;
-			m2 = mfccb * c1o2 + mfcbb * (vvy + c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfcab = m0;
-			mfcbb = m1;
-			mfccb = m2;
-			/////////c//////////////////////////////////////////////////////////////////////////
-			m0 = mfccc * c1o2 + mfcbc * (vvy - c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-			m1 = -mfccc - c2o1* mfcbc *  vvy + mfcac                   * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
-			m2 = mfccc * c1o2 + mfcbc * (vvy + c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
-			mfcac = m0;
-			mfcbc = m1;
-			mfccc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// X - Dir
-			m0 = mfcaa * c1o2 + mfbaa * (vvx - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcaa - c2o1* mfbaa *  vvx + mfaaa                   * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
-			m2 = mfcaa * c1o2 + mfbaa * (vvx + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfaaa = m0;
-			mfbaa = m1;
-			mfcaa = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcba * c1o2 + mfbba * (vvx - c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcba - c2o1* mfbba *  vvx + mfaba                  * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
-			m2 = mfcba * c1o2 + mfbba * (vvx + c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfaba = m0;
-			mfbba = m1;
-			mfcba = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcca * c1o2 + mfbca * (vvx - c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcca - c2o1* mfbca *  vvx + mfaca                   * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
-			m2 = mfcca * c1o2 + mfbca * (vvx + c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfaca = m0;
-			mfbca = m1;
-			mfcca = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcab * c1o2 + mfbab * (vvx - c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcab - c2o1* mfbab *  vvx + mfaab                  * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
-			m2 = mfcab * c1o2 + mfbab * (vvx + c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfaab = m0;
-			mfbab = m1;
-			mfcab = m2;
-			///////////b////////////////////////////////////////////////////////////////////////
-			m0 = mfcbb * c1o2 + mfbbb * (vvx - c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcbb - c2o1* mfbbb *  vvx + mfabb                  * (c1o1 - vx2) - c4o9 * oMdrho * vx2;
-			m2 = mfcbb * c1o2 + mfbbb * (vvx + c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfabb = m0;
-			mfbbb = m1;
-			mfcbb = m2;
-			///////////b////////////////////////////////////////////////////////////////////////
-			m0 = mfccb * c1o2 + mfbcb * (vvx - c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfccb - c2o1* mfbcb *  vvx + mfacb                  * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
-			m2 = mfccb * c1o2 + mfbcb * (vvx + c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfacb = m0;
-			mfbcb = m1;
-			mfccb = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-			////////////////////////////////////////////////////////////////////////////////////
-			m0 = mfcac * c1o2 + mfbac * (vvx - c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcac - c2o1* mfbac *  vvx + mfaac                   * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
-			m2 = mfcac * c1o2 + mfbac * (vvx + c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfaac = m0;
-			mfbac = m1;
-			mfcac = m2;
-			///////////c////////////////////////////////////////////////////////////////////////
-			m0 = mfcbc * c1o2 + mfbbc * (vvx - c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfcbc - c2o1* mfbbc *  vvx + mfabc                  * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
-			m2 = mfcbc * c1o2 + mfbbc * (vvx + c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfabc = m0;
-			mfbbc = m1;
-			mfcbc = m2;
-			///////////c////////////////////////////////////////////////////////////////////////
-			m0 = mfccc * c1o2 + mfbcc * (vvx - c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-			m1 = -mfccc - c2o1* mfbcc *  vvx + mfacc                   * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
-			m2 = mfccc * c1o2 + mfbcc * (vvx + c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
-			mfacc = m0;
-			mfbcc = m1;
-			mfccc = m2;
-			////////////////////////////////////////////////////////////////////////////////////
-
-			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
-			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
-			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
-			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
-			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
-			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
-			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
-			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
-			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
-			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
-			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
-			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
-			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
-			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
-			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
-			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
-			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
-			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
-			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
-			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
-			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
-			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
-			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
-			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
-			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
-			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
-			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
-										////////////////////////////////////////////////////////////////////////////////////
-		}
-	}
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
deleted file mode 100644
index f44842057d554498b0b5d4c733e2425e524a3b75..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef LB_Kernel_CUMULANT_K17_COMP_H
-#define LB_Kernel_CUMULANT_K17_COMP_H
-
-#include <DataTypes.h>
-#include <curand.h>
-
-__global__ void LB_Kernel_CumulantK17Comp(	real omega,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														int level,
-														real* forces,
-                                                        real* quadricLimiters,
-														bool EvenOrOdd);
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
similarity index 63%
rename from src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu
rename to src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
index db8caf1b23c2087a4c5c76886fb4530bc6272a1d..1ffec96c255b7923f3ee39c01f756abd8cad8862 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
@@ -1,55 +1,78 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file Cumulant27chimStream.cu
-//! \ingroup GPU
-//! \author Martin Schoenherr, Anna Wellmann
+//! \file CumlantK17_Device.cu
+//! \author Anna Wellmann, Martin SchÃ¶nherr, Henry Korb, Henrik Asmuth
+//! \date 05/12/2022
+//! \brief Kernel for CumulantK17 including different turbulence models and options for local body forces and writing macroscopic variables
+//!
+//! CumulantK17 kernel using chimera transformations and quartic limiters as present in Geier et al. (2017). Additional options are three different
+//! eddy-viscosity turbulence models (Smagorinsky, AMD, QR) that can be set via the template parameter turbulenceModel (with default
+//! TurbulenceModel::None).
+//! The kernel is executed separately for each subset of fluid node indices with a different tag CollisionTemplate. For each subset, only the locally
+//! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default
+//! refers to the plain cumlant kernel (CollisionTemplate::Default).
+//! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets
+//! are provided by the utilized PostCollisionInteractiors depending on they specific requirements (e.g. writeMacroscopicVariables for probes).
+
 //=======================================================================================
-/* Device code */
-#include "LBM/LB.h" 
+#include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "lbm/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
+
+#include "GPU/TurbulentViscosityInlines.cuh"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LB_Kernel_CumulantK17CompChimRedesigned(
-    real omega,
+template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce>
+__global__ void LB_Kernel_CumulantK17(
+    real omega_in,
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
     real* distributions,
-    unsigned long numberOfLBnodes,
+    real* rho,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* turbulentViscosity,
+    real SGSconstant,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
+    real* bodyForceX,
+    real* bodyForceY,
+    real* bodyForceZ,
     real* quadricLimiters,
     bool isEvenTimestep,
     const uint *fluidNodeIndices,
@@ -64,19 +87,18 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //! The cumulant kernel is executed in the following steps
     //!
     ////////////////////////////////////////////////////////////////////////////////
-    //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
-    const unsigned kThread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
-    //! - Return for non-fluid nodes
-    if (kThread >= numberOfFluidNodes) 
+    // run for all indices in size_Mat and fluid nodes
+    if (nodeIndex >= numberOfFluidNodes)
         return;
-
     ////////////////////////////////////////////////////////////////////////////////
     //! - Get the node index from the array containing all indices of fluid nodes
     //!
-    const unsigned k_000 = fluidNodeIndices[kThread];
+    const unsigned k_000 = fluidNodeIndices[nodeIndex];
 
     //////////////////////////////////////////////////////////////////////////
     //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
@@ -84,11 +106,11 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
-    
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set neighbor indices (necessary for indirect addressing)
-    //!
     uint k_M00 = neighborX[k_000];
     uint k_0M0 = neighborY[k_000];
     uint k_00M = neighborZ[k_000];
@@ -96,9 +118,8 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     uint k_M0M = neighborZ[k_M00];
     uint k_0MM = neighborZ[k_0M0];
     uint k_MMM = neighborZ[k_MM0];
-
     ////////////////////////////////////////////////////////////////////////////////////
-    //! - Set local distributions (f's):
+    //! - Set local distributions
     //!
     real f_000 = (dist.f[DIR_000])[k_000];
     real f_P00 = (dist.f[DIR_P00])[k_000];
@@ -159,28 +180,28 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     real& m_200 = f_PMM;
     real& m_000 = f_MMM;
 
-    ////////////////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////(unsigned long)//////////////////////////////
     //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
     //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
     //!
     real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
-                 (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
-                  ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
-                  ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
                     f_000;
 
     real oneOverRho = c1o1 / (c1o1 + drho);
 
     real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
                 (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
-               oneOverRho;
+            oneOverRho;
     real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
                 (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
-               oneOverRho;
+            oneOverRho;
     real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
                 (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
-               oneOverRho;
+            oneOverRho;
 
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
@@ -188,16 +209,55 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
     //!
     real factor = c1o1;
-    // The factor has to be scaled for each level to get the correct acceleration.
     for (size_t i = 1; i <= level; i++) {
         factor *= c2o1;
     }
-    real fx = forces[0] / factor;
-    real fy = forces[1] / factor;
-    real fz = forces[2] / factor;
-    vvx += fx * c1o2;
-    vvy += fy * c1o2;
-    vvz += fz * c1o2;
+
+    real fx = forces[0];
+    real fy = forces[1];
+    real fz = forces[2];
+
+    if( applyBodyForce ){
+        fx += bodyForceX[k_000];
+        fy += bodyForceY[k_000];
+        fz += bodyForceZ[k_000];
+
+        // real vx = vvx;
+        // real vy = vvy;
+        // real vz = vvz;
+        real acc_x = fx * c1o2 / factor;
+        real acc_y = fy * c1o2 / factor;
+        real acc_z = fz * c1o2 / factor;
+
+        vvx += acc_x;
+        vvy += acc_y;
+        vvz += acc_z;
+
+        // Reset body force. To be used when not using round-off correction.
+        bodyForceX[k_000] = 0.0f;
+        bodyForceY[k_000] = 0.0f;
+        bodyForceZ[k_000] = 0.0f;
+
+        ////////////////////////////////////////////////////////////////////////////////////
+        //!> Round-off correction
+        //!
+        //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
+        //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation.
+        //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can
+        //!> differ by several orders of magnitude.
+        //!> \note 16/05/2022: Testing, still ongoing!
+        //!
+        // bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1;
+        // bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1;
+        // bodyForceZ[k_000] = (acc_z-(vvz-vz))*factor*c2o1;
+    }
+    else{
+        vvx += fx * c1o2 / factor;
+        vvy += fy * c1o2 / factor;
+        vvz += fz * c1o2 / factor;
+    }
+
+
     ////////////////////////////////////////////////////////////////////////////////////
     // calculate the square of velocities for this lattice node
     real vx2 = vvx * vvx;
@@ -272,15 +332,21 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
     //!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
     //!
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate modified omega with turbulent viscosity
+    //!
+    real omega = omega_in;
+    if(turbulenceModel != TurbulenceModel::None){ omega /= (c1o1 + c3o1*omega_in*turbulentViscosity[k_000]); }
     ////////////////////////////////////////////////////////////
     // 2.
     real OxxPyyPzz = c1o1;
     ////////////////////////////////////////////////////////////
     // 3.
-    real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega)  / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
-    real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
-    real Oxyz     = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
-                    (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
+    real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
+    real OxyyMxzz =
+        c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
+    real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
+                (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
     ////////////////////////////////////////////////////////////
     // 4.
     real O4 = c1o1;
@@ -292,16 +358,16 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     real O6 = c1o1;
 
     ////////////////////////////////////////////////////////////////////////////////////
-    //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
+    //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
     //! different bulk viscosity).
     //!
-    real factorA = (c4o1 + c2o1  * omega - c3o1  * omega * omega) / (c2o1 - c7o1  * omega + c5o1  * omega * omega);
+    real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
     real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
 
     ////////////////////////////////////////////////////////////////////////////////////
-    //! - Compute cumulants (c's) from central moments according to Eq. (20)-(23) in
+    //! - Compute cumulants from central moments according to Eq. (20)-(23) in
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
     //!
@@ -318,27 +384,27 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     // 5.
     real c_122 =
         m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
-                 c1o3 * (m_120 + m_102)) *
-                 oneOverRho;
+                c1o3 * (m_120 + m_102)) *
+                oneOverRho;
     real c_212 =
         m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
-                 c1o3 * (m_210 + m_012)) *
-                 oneOverRho;
+                c1o3 * (m_210 + m_012)) *
+                oneOverRho;
     real c_221 =
         m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
-                 c1o3 * (m_021 + m_201)) *
-                 oneOverRho;
+                c1o3 * (m_021 + m_201)) *
+                oneOverRho;
     ////////////////////////////////////////////////////////////
     // 6.
     real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
                             c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
                             c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
                             oneOverRho +
-                           (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                        (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
                             c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
                             oneOverRho * oneOverRho -
                             c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
-                           (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                        (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
                             (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
                             oneOverRho * oneOverRho * c2o3 +
                             c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
@@ -378,6 +444,22 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz);
     real dyuy = dxux + omega * c3o2 * mxxMyy;
     real dzuz = dxux + omega * c3o2 * mxxMzz;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    switch (turbulenceModel)
+    {
+    case TurbulenceModel::None:
+    case TurbulenceModel::AMD:  //AMD is computed in separate kernel
+        break;
+    case TurbulenceModel::Smagorinsky:
+        turbulentViscosity[k_000] = calcTurbulentViscositySmagorinsky(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
+        break;
+    case TurbulenceModel::QR:
+        turbulentViscosity[k_000] = calcTurbulentViscosityQR(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
+        break;
+    default:
+        break;
+    }
     ////////////////////////////////////////////////////////////
     //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
@@ -386,7 +468,6 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
     mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
     mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-    //////////////////////////////////////////////////////////////////////////
 
     ////////////////////////////////////////////////////////////////////////////////////
     ////no correction
@@ -394,18 +475,18 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     // mxxMyy += -(-omega) * (-mxxMyy);
     // mxxMzz += -(-omega) * (-mxxMzz);
     //////////////////////////////////////////////////////////////////////////
-    
     m_011 += omega * (-m_011);
     m_101 += omega * (-m_101);
     m_110 += omega * (-m_110);
 
+    ////////////////////////////////////////////////////////////////////////////////////
+    // relax
     //////////////////////////////////////////////////////////////////////////
+    // incl. limiter
     //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
     //!
-    //////////////////////////////////////////////////////////////////////////
-    // incl. limiter
     real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD);
     m_111 += wadjust * (-m_111);
     wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP);
@@ -459,6 +540,7 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz           * c1o3 * factorB + (c1o1 - O4) * (c_121);
     c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz           * c1o3 * factorB + (c1o1 - O4) * (c_211);
 
+
     //////////////////////////////////////////////////////////////////////////
     // 5.
     c_122 += O5 * (-c_122);
@@ -503,17 +585,17 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //////////////////////////////////////////////////////////////////////////
     // 6.
     m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
-                       c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
-                       c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
-                       oneOverRho +
-                      (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
-                       c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
-                       oneOverRho * oneOverRho -
-                       c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
-                      (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
-                       (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
-                       oneOverRho * oneOverRho * c2o3 +
-                       c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
+                    c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
+                    c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
+                    oneOverRho +
+                    (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                    c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
+                    oneOverRho * oneOverRho -
+                    c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
+                    (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                    (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
+                    oneOverRho * oneOverRho * c2o3 +
+                    c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
 
     ////////////////////////////////////////////////////////////////////////////////////
     //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
@@ -524,6 +606,15 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     m_010 = -m_010;
     m_001 = -m_001;
 
+    //Write to array here to distribute read/write
+    if(writeMacroscopicVariables)
+    {
+        rho[k_000] = drho;
+        vx[k_000] = vvx;
+        vy[k_000] = vvy;
+        vz[k_000] = vvz;
+    }
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
@@ -573,31 +664,63 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned(
     //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    (dist.f[DIR_P00])[k_000] = f_M00;
-    (dist.f[DIR_M00])[k_M00] = f_P00;
-    (dist.f[DIR_0P0])[k_000] = f_0M0;
-    (dist.f[DIR_0M0])[k_0M0] = f_0P0;
-    (dist.f[DIR_00P])[k_000] = f_00M;
-    (dist.f[DIR_00M])[k_00M] = f_00P;
-    (dist.f[DIR_PP0])[k_000] = f_MM0;
-    (dist.f[DIR_MM0])[k_MM0] = f_PP0;
-    (dist.f[DIR_PM0])[k_0M0] = f_MP0;
-    (dist.f[DIR_MP0])[k_M00] = f_PM0;
-    (dist.f[DIR_P0P])[k_000] = f_M0M;
-    (dist.f[DIR_M0M])[k_M0M] = f_P0P;
-    (dist.f[DIR_P0M])[k_00M] = f_M0P;
-    (dist.f[DIR_M0P])[k_M00] = f_P0M;
-    (dist.f[DIR_0PP])[k_000] = f_0MM;
-    (dist.f[DIR_0MM])[k_0MM] = f_0PP;
-    (dist.f[DIR_0PM])[k_00M] = f_0MP;
-    (dist.f[DIR_0MP])[k_0M0] = f_0PM;
+    (dist.f[DIR_P00])[k_000]    = f_M00;
+    (dist.f[DIR_M00])[k_M00]    = f_P00;
+    (dist.f[DIR_0P0])[k_000]    = f_0M0;
+    (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
+    (dist.f[DIR_00P])[k_000]    = f_00M;
+    (dist.f[DIR_00M])[k_00M]    = f_00P;
+    (dist.f[DIR_PP0])[k_000]   = f_MM0;
+    (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
+    (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
+    (dist.f[DIR_MP0])[k_M00]   = f_PM0;
+    (dist.f[DIR_P0P])[k_000]   = f_M0M;
+    (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
+    (dist.f[DIR_P0M])[k_00M]   = f_M0P;
+    (dist.f[DIR_M0P])[k_M00]   = f_P0M;
+    (dist.f[DIR_0PP])[k_000]   = f_0MM;
+    (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
+    (dist.f[DIR_0PM])[k_00M]   = f_0MP;
+    (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
     (dist.f[DIR_000])[k_000] = f_000;
-    (dist.f[DIR_PPP])[k_000] = f_MMM;
-    (dist.f[DIR_PMP])[k_0M0] = f_MPM;
-    (dist.f[DIR_PPM])[k_00M] = f_MMP;
-    (dist.f[DIR_PMM])[k_0MM] = f_MPP;
-    (dist.f[DIR_MPP])[k_M00] = f_PMM;
-    (dist.f[DIR_MMP])[k_MM0] = f_PPM;
-    (dist.f[DIR_MPM])[k_M0M] = f_PMP;
-    (dist.f[DIR_MMM])[k_MMM] = f_PPP;
-}
\ No newline at end of file
+    (dist.f[DIR_PPP])[k_000]  = f_MMM;
+    (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
+    (dist.f[DIR_PPM])[k_00M]  = f_MMP;
+    (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
+    (dist.f[DIR_MPP])[k_M00]  = f_PMM;
+    (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
+    (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
+    (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
+}
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..da576618d1b08b55629c3c65fc115ceb822c8f7e
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
@@ -0,0 +1,29 @@
+#ifndef LB_Kernel_CUMULANT_K17_H
+#define LB_Kernel_CUMULANT_K17_H
+
+#include <DataTypes.h>
+#include <curand.h>
+
+template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce > __global__ void LB_Kernel_CumulantK17(
+    real omega_in,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    real* rho,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* turbulentViscosity,
+    real SGSconstant,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    real* bodyForceX,
+    real* bodyForceY,
+    real* bodyForceZ,
+    real* quadricLimiters,
+    bool isEvenTimestep,
+    const uint *fluidNodeIndices,
+    uint numberOfFluidNodes);
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
index 72d13282fc604dddcfa84682425a7a1829855ea0..b9e25494490507bde5a6aa7d6dd588ac1a1f6c87 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
@@ -17,17 +17,18 @@ void CumulantK17BulkComp::run()
 	dim3 grid(Grid, 1, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_CumulantK17BulkComp << < grid, threads >> >(	para->getParD(level)->omega,
-																	para->getParD(level)->typeOfGridNode,
-																	para->getParD(level)->neighborX,
-																	para->getParD(level)->neighborY,
-																	para->getParD(level)->neighborZ,
-																	para->getParD(level)->distributions.f[0],
-																	para->getParD(level)->numberOfNodes,
-																	level,
-																	para->getForcesDev(),
-                                                                    para->getQuadricLimitersDev(),
-																	para->getParD(level)->isEvenTimestep);
+	LB_Kernel_CumulantK17BulkComp << < grid, threads >> >(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_CumulantK17BulkComp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
index cec04116ae4b411b1b3816ff4a8cab606c92491e..b33a3c251b5fb0cde8b1da0fcce097f955353d69 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index 6ef6b40d3b7079579f54ca68734deb274d0c1c3a..295804887f9c451120d463c7fcdd968bd2f24d12 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -31,15 +31,16 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
 
 void CumulantK17Unified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 para->getParD(level)->forcing,
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        para->getParD(level)->forcing,
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
index 3eea267e55fee45111fb11cf1258559e2c3c63f2..a0db78d27b00372feab8490111183481abbec8b9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
@@ -33,11 +33,12 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "lbm/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LB_Kernel_CumulantK17CompChim(
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu
deleted file mode 100644
index 8c06b7117c8b1ef62b932a76bf5de0be2ae99b1c..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "CumulantK17CompChimRedesigned.h"
-
-#include "Parameter/Parameter.h"
-#include "Parameter/CudaStreamManager.h"
-#include "CumulantK17CompChimRedesigned_Device.cuh"
-
-#include <cuda.h>
-
-std::shared_ptr<CumulantK17CompChimRedesigned> CumulantK17CompChimRedesigned::getNewInstance(std::shared_ptr<Parameter> para,
-                                                                               int level)
-{
-    return std::shared_ptr<CumulantK17CompChimRedesigned>(new CumulantK17CompChimRedesigned(para, level));
-}
-
-void CumulantK17CompChimRedesigned::run()
-{
-    LB_Kernel_CumulantK17CompChimRedesigned <<< cudaGrid.grid, cudaGrid.threads >>>(
-        para->getParD(level)->omega,
-        para->getParD(level)->neighborX,
-        para->getParD(level)->neighborY,
-        para->getParD(level)->neighborZ,
-        para->getParD(level)->distributions.f[0],
-        para->getParD(level)->numberOfNodes,
-        level,
-        para->getForcesDev(),
-        para->getQuadricLimitersDev(),
-        para->getParD(level)->isEvenTimestep,
-        para->getParD(level)->fluidNodeIndices,
-        para->getParD(level)->numberOfFluidNodes);
-    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
-}
-
-void CumulantK17CompChimRedesigned::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex)
-{
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
-
-    LB_Kernel_CumulantK17CompChimRedesigned<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>(
-        para->getParD(level)->omega, 
-        para->getParD(level)->neighborX, 
-        para->getParD(level)->neighborY,
-        para->getParD(level)->neighborZ, 
-        para->getParD(level)->distributions.f[0], 
-        para->getParD(level)->numberOfNodes, 
-        level,
-        para->getForcesDev(), 
-        para->getQuadricLimitersDev(),
-        para->getParD(level)->isEvenTimestep,
-        indices,
-        size_indices);
-    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
-    
-}
-
-CumulantK17CompChimRedesigned::CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
-{
-    myPreProcessorTypes.push_back(InitCompSP27);
-    myKernelGroup = BasicKernel;
-    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
-    this->kernelUsesFluidNodeIndices = true;
-}
-
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h
deleted file mode 100644
index 4658075de330665fdba88a5ec8149a9b476d5ac7..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef CUMULANT_K17_COMP_CHIM_REDESIGN_H
-#define CUMULANT_K17_COMP_CHIM_REDESIGN_H
-
-#include "Kernel/KernelImp.h"
-
-class CumulantK17CompChimRedesigned : public KernelImp
-{
-public:
-    static std::shared_ptr<CumulantK17CompChimRedesigned> getNewInstance(std::shared_ptr<Parameter> para, int level);
-	void run() override;
-    void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override;
-
-private:
-    CumulantK17CompChimRedesigned();
-    CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level);
-};
-
-#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh
deleted file mode 100644
index 00628efc76447a09504d2fd32a26a63a4d611c66..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H
-#define LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H
-
-#include <DataTypes.h>
-#include <curand.h>
-
-__global__ void LB_Kernel_CumulantK17CompChimRedesigned(
-    real omega,
-    uint* neighborX,
-    uint* neighborY,
-    uint* neighborZ,
-    real* distributions,
-    unsigned long numberOfLBnodes,
-    int level,
-    real* forces,
-    real* quadricLimiters,
-    bool isEvenTimestep,
-    const uint* fluidNodeIndices,
-    uint numberOfFluidNodes);
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
deleted file mode 100644
index 6fae9f6d4845019afd363790eea0ee17c69a060f..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "CumulantK17CompChimStream.h"
-
-#include "Parameter/Parameter.h"
-#include "Parameter/CudaStreamManager.h"
-#include "CumulantK17CompChimStream_Device.cuh"
-
-#include <cuda.h>
-
-std::shared_ptr<CumulantK17CompChimStream> CumulantK17CompChimStream::getNewInstance(std::shared_ptr<Parameter> para,
-                                                                               int level)
-{
-    return std::shared_ptr<CumulantK17CompChimStream>(new CumulantK17CompChimStream(para, level));
-}
-
-void CumulantK17CompChimStream::run()
-{
-    LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>(
-        para->getParD(level)->omega,
-        para->getParD(level)->neighborX,
-        para->getParD(level)->neighborY,
-        para->getParD(level)->neighborZ,
-        para->getParD(level)->distributions.f[0],
-        para->getParD(level)->numberOfNodes,
-        level,
-        para->getForcesDev(),
-        para->getQuadricLimitersDev(),
-        para->getParD(level)->isEvenTimestep,
-        para->getParD(level)->fluidNodeIndices,
-        para->getParD(level)->numberOfFluidNodes);
-    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
-}
-
-void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex)
-{
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
-
-    LB_Kernel_CumulantK17CompChimStream<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>(
-        para->getParD(level)->omega, 
-        para->getParD(level)->neighborX, 
-        para->getParD(level)->neighborY,
-        para->getParD(level)->neighborZ, 
-        para->getParD(level)->distributions.f[0], 
-        para->getParD(level)->numberOfNodes, 
-        level,
-        para->getForcesDev(), 
-        para->getQuadricLimitersDev(), 
-        para->getParD(level)->isEvenTimestep,
-        indices,
-        size_indices);
-    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
-    
-}
-
-CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
-{
-    myPreProcessorTypes.push_back(InitCompSP27);
-    myKernelGroup = BasicKernel;
-    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
-    this->kernelUsesFluidNodeIndices = true;
-}
-
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h
deleted file mode 100644
index 325826e04c893b7c56b7f00bb2503a4eb1fda441..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef CUMULANT_K17_COMP_CHIM_SPARSE_H
-#define CUMULANT_K17_COMP_CHIM_SPARSE_H
-
-#include "Kernel/KernelImp.h"
-
-class CumulantK17CompChimStream : public KernelImp
-{
-public:
-    static std::shared_ptr<CumulantK17CompChimStream> getNewInstance(std::shared_ptr<Parameter> para, int level);
-	void run() override;
-    void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override;
-
-private:
-    CumulantK17CompChimStream();
-    CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level);
-};
-
-#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
deleted file mode 100644
index 830fcc6c328f2ecd0f626539040868696065065f..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
+++ /dev/null
@@ -1,640 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file Cumulant27chimStream.cu
-//! \ingroup GPU
-//! \author Martin Schoenherr, Anna Wellmann
-//=======================================================================================
-/* Device code */
-#include "LBM/LB.h" 
-#include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
-
-////////////////////////////////////////////////////////////////////////////////
-__global__ void LB_Kernel_CumulantK17CompChimStream(
-	real omega,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	unsigned long size_Mat,
-	int level,
-	real* forces,
-	real* quadricLimiters,
-	bool isEvenTimestep,
-    const uint *fluidNodeIndices, 
-    uint numberOfFluidNodes)
-{
-    //////////////////////////////////////////////////////////////////////////
-    //! Cumulant K17 Kernel is based on \ref
-    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-    //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017),
-    //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
-    //!
-    //! The cumulant kernel is executed in the following steps
-    //!
-    ////////////////////////////////////////////////////////////////////////////////
-    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-    //!
-    const unsigned x = threadIdx.x;
-    const unsigned y = blockIdx.x;
-    const unsigned z = blockIdx.y;
-
-    const unsigned nx = blockDim.x;
-    const unsigned ny = gridDim.x;
-
-    const unsigned k_thread = nx * (ny * z + y) + x;
-
-    //////////////////////////////////////////////////////////////////////////
-    // run for all indices in fluidNodeIndices
-    if (k_thread < numberOfFluidNodes) {
-        //////////////////////////////////////////////////////////////////////////
-        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
-        //! timestep is based on the esoteric twist algorithm \ref <a
-        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-        //! DOI:10.3390/computation5020019 ]</b></a>
-
-        const unsigned k = fluidNodeIndices[k_thread];
-
-        Distributions27 dist;
-        if (isEvenTimestep) {
-            dist.f[DIR_P00]    = &distributions[DIR_P00 * size_Mat];
-            dist.f[DIR_M00]    = &distributions[DIR_M00 * size_Mat];
-            dist.f[DIR_0P0]    = &distributions[DIR_0P0 * size_Mat];
-            dist.f[DIR_0M0]    = &distributions[DIR_0M0 * size_Mat];
-            dist.f[DIR_00P]    = &distributions[DIR_00P * size_Mat];
-            dist.f[DIR_00M]    = &distributions[DIR_00M * size_Mat];
-            dist.f[DIR_PP0]   = &distributions[DIR_PP0 * size_Mat];
-            dist.f[DIR_MM0]   = &distributions[DIR_MM0 * size_Mat];
-            dist.f[DIR_PM0]   = &distributions[DIR_PM0 * size_Mat];
-            dist.f[DIR_MP0]   = &distributions[DIR_MP0 * size_Mat];
-            dist.f[DIR_P0P]   = &distributions[DIR_P0P * size_Mat];
-            dist.f[DIR_M0M]   = &distributions[DIR_M0M * size_Mat];
-            dist.f[DIR_P0M]   = &distributions[DIR_P0M * size_Mat];
-            dist.f[DIR_M0P]   = &distributions[DIR_M0P * size_Mat];
-            dist.f[DIR_0PP]   = &distributions[DIR_0PP * size_Mat];
-            dist.f[DIR_0MM]   = &distributions[DIR_0MM * size_Mat];
-            dist.f[DIR_0PM]   = &distributions[DIR_0PM * size_Mat];
-            dist.f[DIR_0MP]   = &distributions[DIR_0MP * size_Mat];
-            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-            dist.f[DIR_PPP]  = &distributions[DIR_PPP * size_Mat];
-            dist.f[DIR_MMP]  = &distributions[DIR_MMP * size_Mat];
-            dist.f[DIR_PMP]  = &distributions[DIR_PMP * size_Mat];
-            dist.f[DIR_MPP]  = &distributions[DIR_MPP * size_Mat];
-            dist.f[DIR_PPM]  = &distributions[DIR_PPM * size_Mat];
-            dist.f[DIR_MMM]  = &distributions[DIR_MMM * size_Mat];
-            dist.f[DIR_PMM]  = &distributions[DIR_PMM * size_Mat];
-            dist.f[DIR_MPM]  = &distributions[DIR_MPM * size_Mat];
-        } else {
-            dist.f[DIR_M00]    = &distributions[DIR_P00 * size_Mat];
-            dist.f[DIR_P00]    = &distributions[DIR_M00 * size_Mat];
-            dist.f[DIR_0M0]    = &distributions[DIR_0P0 * size_Mat];
-            dist.f[DIR_0P0]    = &distributions[DIR_0M0 * size_Mat];
-            dist.f[DIR_00M]    = &distributions[DIR_00P * size_Mat];
-            dist.f[DIR_00P]    = &distributions[DIR_00M * size_Mat];
-            dist.f[DIR_MM0]   = &distributions[DIR_PP0 * size_Mat];
-            dist.f[DIR_PP0]   = &distributions[DIR_MM0 * size_Mat];
-            dist.f[DIR_MP0]   = &distributions[DIR_PM0 * size_Mat];
-            dist.f[DIR_PM0]   = &distributions[DIR_MP0 * size_Mat];
-            dist.f[DIR_M0M]   = &distributions[DIR_P0P * size_Mat];
-            dist.f[DIR_P0P]   = &distributions[DIR_M0M * size_Mat];
-            dist.f[DIR_M0P]   = &distributions[DIR_P0M * size_Mat];
-            dist.f[DIR_P0M]   = &distributions[DIR_M0P * size_Mat];
-            dist.f[DIR_0MM]   = &distributions[DIR_0PP * size_Mat];
-            dist.f[DIR_0PP]   = &distributions[DIR_0MM * size_Mat];
-            dist.f[DIR_0MP]   = &distributions[DIR_0PM * size_Mat];
-            dist.f[DIR_0PM]   = &distributions[DIR_0MP * size_Mat];
-            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-            dist.f[DIR_MMM]  = &distributions[DIR_PPP * size_Mat];
-            dist.f[DIR_PPM]  = &distributions[DIR_MMP * size_Mat];
-            dist.f[DIR_MPM]  = &distributions[DIR_PMP * size_Mat];
-            dist.f[DIR_PMM]  = &distributions[DIR_MPP * size_Mat];
-            dist.f[DIR_MMP]  = &distributions[DIR_PPM * size_Mat];
-            dist.f[DIR_PPP]  = &distributions[DIR_MMM * size_Mat];
-            dist.f[DIR_MPP]  = &distributions[DIR_PMM * size_Mat];
-            dist.f[DIR_PMP]  = &distributions[DIR_MPM * size_Mat];
-        }
-        ////////////////////////////////////////////////////////////////////////////////
-        //! - Set neighbor indices (necessary for indirect addressing)
-        uint kw   = neighborX[k];
-        uint ks   = neighborY[k];
-        uint kb   = neighborZ[k];
-        uint ksw  = neighborY[kw];
-        uint kbw  = neighborZ[kw];
-        uint kbs  = neighborZ[ks];
-        uint kbsw = neighborZ[ksw];
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set local distributions
-        //!
-        real mfcbb = (dist.f[DIR_P00])[k];
-        real mfabb = (dist.f[DIR_M00])[kw];
-        real mfbcb = (dist.f[DIR_0P0])[k];
-        real mfbab = (dist.f[DIR_0M0])[ks];
-        real mfbbc = (dist.f[DIR_00P])[k];
-        real mfbba = (dist.f[DIR_00M])[kb];
-        real mfccb = (dist.f[DIR_PP0])[k];
-        real mfaab = (dist.f[DIR_MM0])[ksw];
-        real mfcab = (dist.f[DIR_PM0])[ks];
-        real mfacb = (dist.f[DIR_MP0])[kw];
-        real mfcbc = (dist.f[DIR_P0P])[k];
-        real mfaba = (dist.f[DIR_M0M])[kbw];
-        real mfcba = (dist.f[DIR_P0M])[kb];
-        real mfabc = (dist.f[DIR_M0P])[kw];
-        real mfbcc = (dist.f[DIR_0PP])[k];
-        real mfbaa = (dist.f[DIR_0MM])[kbs];
-        real mfbca = (dist.f[DIR_0PM])[kb];
-        real mfbac = (dist.f[DIR_0MP])[ks];
-        real mfbbb = (dist.f[DIR_000])[k];
-        real mfccc = (dist.f[DIR_PPP])[k];
-        real mfaac = (dist.f[DIR_MMP])[ksw];
-        real mfcac = (dist.f[DIR_PMP])[ks];
-        real mfacc = (dist.f[DIR_MPP])[kw];
-        real mfcca = (dist.f[DIR_PPM])[kb];
-        real mfaaa = (dist.f[DIR_MMM])[kbsw];
-        real mfcaa = (dist.f[DIR_PMM])[kbs];
-        real mfaca = (dist.f[DIR_MPM])[kbw];
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-                     (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) +
-                      ((mfacb + mfcab) + (mfaab + mfccb))) +
-                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) +
-                    mfbbb;
-
-        real rho   = c1o1 + drho;
-        real OOrho = c1o1 / rho;
-
-        real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb)) *
-                   OOrho;
-        real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab)) *
-                   OOrho;
-        real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba)) *
-                   OOrho;
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        real factor = c1o1;
-        for (size_t i = 1; i <= level; i++) {
-            factor *= c2o1;
-        }
-        real fx = forces[0] / factor;
-        real fy = forces[1] / factor;
-        real fz = forces[2] / factor;
-        vvx += fx * c1o2;
-        vvy += fy * c1o2;
-        vvz += fz * c1o2;
-        ////////////////////////////////////////////////////////////////////////////////////
-        // calculate the square of velocities for this lattice node
-        real vx2 = vvx * vvx;
-        real vy2 = vvy * vvy;
-        real vz2 = vvz * vvz;
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to
-        //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        real wadjust;
-        real qudricLimitP = quadricLimiters[0];
-        real qudricLimitM = quadricLimiters[1];
-        real qudricLimitD = quadricLimiters[2];
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a
-        //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-        //! ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Z - Dir
-        forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-        forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Y - Dir
-        forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-        forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-        forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-        forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-        forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-        forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-        forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-        forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-        forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // X - Dir
-        forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-        forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-        forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-        forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-        forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-        forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-        forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-        forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-        forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
-        //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!  => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE].
-        //!  - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk
-        //!  viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$.
-        //!  - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz
-        //!  \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$.
-        //!  - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz
-        //!  \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$.
-        //!  - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with
-        //!  simplifications assuming \f$ \omega_2 = 1.0\f$  (modify for different bulk viscosity).
-        //!  - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification
-        //!  all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$.
-        //!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
-        //!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
-        //!
-        ////////////////////////////////////////////////////////////
-        // 2.
-        real OxxPyyPzz = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 3.
-        real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
-        real OxyyMxzz =
-            c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
-        real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
-                    (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
-        ////////////////////////////////////////////////////////////
-        // 4.
-        real O4 = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 5.
-        real O5 = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 6.
-        real O6 = c1o1;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
-        //! different bulk viscosity).
-        //!
-        real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
-        real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute cumulants from central moments according to Eq. (20)-(23) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////
-        // 4.
-        real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
-        real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
-        real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
-
-        real CUMcca =
-            mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-        real CUMcac =
-            mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
-        real CUMacc =
-            mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-        ////////////////////////////////////////////////////////////
-        // 5.
-        real CUMbcc =
-            mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-                     c1o3 * (mfbca + mfbac)) *
-                        OOrho;
-        real CUMcbc =
-            mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-                     c1o3 * (mfcba + mfabc)) *
-                        OOrho;
-        real CUMccb =
-            mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-                     c1o3 * (mfacb + mfcab)) *
-                        OOrho;
-        ////////////////////////////////////////////////////////////
-        // 6.
-        real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-                                   OOrho +
-                               (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-                                   OOrho * OOrho -
-                               c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                               (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-                                (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-                                   OOrho * OOrho * c2o3 +
-                               c1o27 * ((drho * drho - drho) * OOrho * OOrho));
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute linear combinations of second and third order cumulants
-        //!
-        ////////////////////////////////////////////////////////////
-        // 2.
-        real mxxPyyPzz = mfcaa + mfaca + mfaac;
-        real mxxMyy    = mfcaa - mfaca;
-        real mxxMzz    = mfcaa - mfaac;
-        ////////////////////////////////////////////////////////////
-        // 3.
-        real mxxyPyzz = mfcba + mfabc;
-        real mxxyMyzz = mfcba - mfabc;
-
-        real mxxzPyyz = mfcab + mfacb;
-        real mxxzMyyz = mfcab - mfacb;
-
-        real mxyyPxzz = mfbca + mfbac;
-        real mxyyMxzz = mfbca - mfbac;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // incl. correction
-        ////////////////////////////////////////////////////////////
-        //! - Compute velocity  gradients from second order cumulants according to Eq. (27)-(32)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times
-        //! the gradients later.
-        //!
-        real Dxy  = -c3o1 * omega * mfbba;
-        real Dxz  = -c3o1 * omega * mfbab;
-        real Dyz  = -c3o1 * omega * mfabb;
-        real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-        real dyuy = dxux + omega * c3o2 * mxxMyy;
-        real dzuz = dxux + omega * c3o2 * mxxMzz;
-        ////////////////////////////////////////////////////////////
-        //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        mxxPyyPzz +=
-            OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-        mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-        mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        ////no correction
-        // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);
-        // mxxMyy += -(-omega) * (-mxxMyy);
-        // mxxMzz += -(-omega) * (-mxxMzz);
-        //////////////////////////////////////////////////////////////////////////
-        mfabb += omega * (-mfabb);
-        mfbab += omega * (-mfbab);
-        mfbba += omega * (-mfbba);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // relax
-        //////////////////////////////////////////////////////////////////////////
-        // incl. limiter
-        //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
-        mfbbb += wadjust * (-mfbbb);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
-        mxxyPyzz += wadjust * (-mxxyPyzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
-        mxxyMyzz += wadjust * (-mxxyMyzz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
-        mxxzPyyz += wadjust * (-mxxzPyyz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
-        mxxzMyyz += wadjust * (-mxxzMyyz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
-        mxyyPxzz += wadjust * (-mxyyPxzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
-        mxyyMxzz += wadjust * (-mxyyMxzz);
-        //////////////////////////////////////////////////////////////////////////
-        // no limiter
-        // mfbbb += OxyyMxzz * (-mfbbb);
-        // mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
-        // mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
-        // mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
-        // mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
-        // mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
-        // mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute inverse linear combinations of second and third order cumulants
-        //!
-        mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-        mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
-        mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
-
-        mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-        mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-        mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-        mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-        mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-        mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-        //////////////////////////////////////////////////////////////////////////
-
-        //////////////////////////////////////////////////////////////////////////
-        // 4.
-        // no limiter
-        //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according
-        //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMacc);
-        CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMcac);
-        CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (CUMcca);
-        CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * factorB + (c1o1 - O4) * (CUMbbc);
-        CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (CUMbcb);
-        CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (CUMcbb);
-
-        //////////////////////////////////////////////////////////////////////////
-        // 5.
-        CUMbcc += O5 * (-CUMbcc);
-        CUMcbc += O5 * (-CUMcbc);
-        CUMccb += O5 * (-CUMccb);
-
-        //////////////////////////////////////////////////////////////////////////
-        // 6.
-        CUMccc += O6 * (-CUMccc);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-
-        //////////////////////////////////////////////////////////////////////////
-        // 4.
-        mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
-        mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
-        mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
-
-        mfcca =
-            CUMcca +
-            (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-        mfcac =
-            CUMcac +
-            (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
-        mfacc =
-            CUMacc +
-            (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-
-        //////////////////////////////////////////////////////////////////////////
-        // 5.
-        mfbcc = CUMbcc + c1o3 *
-                             (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb +
-                                      c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-                              (mfbca + mfbac)) *
-                             OOrho;
-        mfcbc = CUMcbc + c1o3 *
-                             (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb +
-                                      c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-                              (mfcba + mfabc)) *
-                             OOrho;
-        mfccb = CUMccb + c1o3 *
-                             (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb +
-                                      c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-                              (mfacb + mfcab)) *
-                             OOrho;
-
-        //////////////////////////////////////////////////////////////////////////
-        // 6.
-        mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                           c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                           c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-                              OOrho +
-                          (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                           c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-                              OOrho * OOrho -
-                          c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                          (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-                           (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-                              OOrho * OOrho * c2o3 +
-                          c1o27 * ((drho * drho - drho) * OOrho * OOrho));
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        mfbaa = -mfbaa;
-        mfaba = -mfaba;
-        mfaab = -mfaab;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a
-        //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-        //! ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////////////////////////////
-        // X - Dir
-        backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-        backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-        backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-        backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-        backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-        backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-        backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-        backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-        backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Y - Dir
-        backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-        backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-        backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-        backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-        backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-        backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-        backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-        backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-        backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Z - Dir
-        backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-        backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Write distributions: style of reading and writing the distributions from/to
-        //! stored arrays dependent on timestep is based on the esoteric twist algorithm
-        //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-        //! DOI:10.3390/computation5020019 ]</b></a>
-        //!
-        (dist.f[DIR_P00])[k]      = mfabb;
-        (dist.f[DIR_M00])[kw]     = mfcbb;
-        (dist.f[DIR_0P0])[k]      = mfbab;
-        (dist.f[DIR_0M0])[ks]     = mfbcb;
-        (dist.f[DIR_00P])[k]      = mfbba;
-        (dist.f[DIR_00M])[kb]     = mfbbc;
-        (dist.f[DIR_PP0])[k]     = mfaab;
-        (dist.f[DIR_MM0])[ksw]   = mfccb;
-        (dist.f[DIR_PM0])[ks]    = mfacb;
-        (dist.f[DIR_MP0])[kw]    = mfcab;
-        (dist.f[DIR_P0P])[k]     = mfaba;
-        (dist.f[DIR_M0M])[kbw]   = mfcbc;
-        (dist.f[DIR_P0M])[kb]    = mfabc;
-        (dist.f[DIR_M0P])[kw]    = mfcba;
-        (dist.f[DIR_0PP])[k]     = mfbaa;
-        (dist.f[DIR_0MM])[kbs]   = mfbcc;
-        (dist.f[DIR_0PM])[kb]    = mfbac;
-        (dist.f[DIR_0MP])[ks]    = mfbca;
-        (dist.f[DIR_000])[k]   = mfbbb;
-        (dist.f[DIR_PPP])[k]    = mfaaa;
-        (dist.f[DIR_PMP])[ks]   = mfaca;
-        (dist.f[DIR_PPM])[kb]   = mfaac;
-        (dist.f[DIR_PMM])[kbs]  = mfacc;
-        (dist.f[DIR_MPP])[kw]   = mfcaa;
-        (dist.f[DIR_MMP])[ksw]  = mfcca;
-        (dist.f[DIR_MPM])[kbw]  = mfcac;
-        (dist.f[DIR_MMM])[kbsw] = mfccc;
-    }
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
deleted file mode 100644
index f74192c0423ba9dc96820d7f46eecb9d49a39ed4..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef LB_Kernel_CUMULANT_K17_COMP_CHIM_SPARSE_H
-#define LB_Kernel_CUMULANT_K17_COMP_CHIM_SPARSE_H
-
-#include <DataTypes.h>
-#include <curand.h>
-
-__global__ void LB_Kernel_CumulantK17CompChimStream(
-	real omega,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	unsigned long size_Mat,
-	int level,
-	real* forces,
-	real* quadricLimiters,
-	bool isEvenTimestep,
-	const uint* fluidNodeIndices,
-	uint numberOfFluidNodes);
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
index 54af306039585f3beb39b05f2f2e0a96ae784e12..2e0af0bdb85d3f008768f9f430e8b4e5d9719b0f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
@@ -1,8 +1,8 @@
 #include "CumulantK18Comp.h"
 
 #include "CumulantK18Comp_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,37 +11,22 @@ std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr
 
 void CumulantK18Comp::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK18Comp << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->g6.g[0],
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getForcesDev(),
-                                                        para->getQuadricLimitersDev(),
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK18Comp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK18Comp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK18Comp execution failed");
 }
 
 CumulantK18Comp::CumulantK18Comp(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
index bb42d113e47ce28f153ac295f2d9a934dd1b213a..0e4ae5caebb9bd4b1c889a78bfadb62487742c98 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
@@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK18Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
index 0c1778dc39496c6564dedcbe1f6e818bee147191..d0d81eaac711d4d80284b66a1040e0e8404f5d4d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
@@ -1,8 +1,8 @@
 #include "CumulantK20Comp.h"
 
 #include "CumulantK20Comp_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,37 +11,22 @@ std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr
 
 void CumulantK20Comp::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK20Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																para->getParD(level)->typeOfGridNode,
-																para->getParD(level)->neighborX,
-																para->getParD(level)->neighborY,
-																para->getParD(level)->neighborZ,
-																para->getParD(level)->distributions.f[0],
-																para->getParD(level)->g6.g[0],
-																para->getParD(level)->numberOfNodes,
-																level,
-																para->getForcesDev(),
-                                                                para->getQuadricLimitersDev(),
-																para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK20Comp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK20Comp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK20Comp execution failed");
 }
 
 CumulantK20Comp::CumulantK20Comp(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
index c805fc293aeb8b182bb0e01df82b584da69d0175..2dbe0bb62412f9363fdd0e714f5da296f81ae5b3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
@@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK20Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
index be94791572f739fb2eef7c049702caeedb6641fc..b576333f50304f5628e073d2eee16cf5b82c9d34 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "MRTCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Paramet
 
 void MRTCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_MRT_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_MRT_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed");
 }
 
 MRTCompSP27::MRTCompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
index a9aefa2d62a962766470c93a62adeefa4f19570e..c3eb51a114e5c4a3be7605765d0889a7bae25cf0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
 				mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
index 558b4f333e7c92b372a5097aa4917dd6d1230a34..3be594e3e39a57cd71741cd060e9dddda15d6035 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
@@ -5,7 +5,7 @@
 #include <DataTypes.h>
 #include <cuda_runtime.h>
 
-#include <lbm/KernelParameter.h>
+#include "lbm/KernelParameter.h"
 
 #include "Kernel/Utilities/DistributionHelper.cuh"
 
@@ -23,7 +23,7 @@ struct GPUKernelParameter
     unsigned int* neighborY;
     unsigned int* neighborZ;
     real* distributions;
-    int size_Mat;
+    int numberOfLBnodes;
     real* forces;
     bool isEvenTimestep;
 };
@@ -31,19 +31,22 @@ struct GPUKernelParameter
 template<typename KernelFunctor>
 __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter)
 {
-    const uint k = getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k >= kernelParameter.size_Mat)
+    if(nodeIndex >= kernelParameter.numberOfLBnodes)
         return;
 
-    if (!isValidFluidNode(kernelParameter.typeOfGridNode[k]))
+    if (!isValidFluidNode(kernelParameter.typeOfGridNode[nodeIndex]))
         return;
 
     DistributionWrapper distributionWrapper {
         kernelParameter.distributions,
-        (unsigned int)kernelParameter.size_Mat,
+        (unsigned int)kernelParameter.numberOfLBnodes,
         kernelParameter.isEvenTimestep,
-        k,
+        nodeIndex,
         kernelParameter.neighborX,
         kernelParameter.neighborY,
         kernelParameter.neighborZ
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
index 81655fac9cfd0b562ba60a5ee289fb64da5c1fba..3fb9be28654f83a7a98bb7d6b3a8a46e9170e7a8 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Par
 
 void BGKIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Incomp_SP_27 execution failed");
 }
 
 BGKIncompSP27::BGKIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
index 9a94006b8a1be745fc2bcfdd80e454152347139d..233595656720f5c84cf5be9e555565af0e9c95d0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
index 86b513f1252f2787abee637819e64606d111c4fa..f274f576a14fc193bcabd44d2c9078a2c98055bc 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKPlusIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared
 
 void BGKPlusIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Plus_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Plus_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Plus_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Plus_Incomp_SP_27 execution failed");
 }
 
 BGKPlusIncompSP27::BGKPlusIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
index 9355e42aa5b05190f063f5247d8d6c0dea787a02..b49b76c6224be4b3543c01647a6553e6fc64b74e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
index 05f374096c9c5da2460b32cf5ae8cb59cfa78382..3a6760b619d2ca1a7eb19771478eb9e5989ead0c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CascadeIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared
 
 void CascadeIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cascade_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cascade_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cascade_Incomp_SP_27 execution failed");
 }
 
 CascadeIncompSP27::CascadeIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
index 92cc749b135739d5f38c9916c4ee0da7497e5f2d..8e607cabb4cc40bbb22c5ad3ec6db2c63154add6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
index 62768ef9948b6c259c5ad4005237081f4d255e73..44beb8507d5664f01283130dd3087a788e4491ed 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "Cumulant1hIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,38 +11,23 @@ std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std::
 
 void Cumulant1hIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_1h_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->deltaPhi,
-													para->getAngularVelocity(),
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->coordinateX,
-													para->getParD(level)->coordinateY,
-													para->getParD(level)->coordinateZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cum_1h_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_1h_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->deltaPhi,
+        para->getAngularVelocity(),
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->coordinateX,
+        para->getParD(level)->coordinateY,
+        para->getParD(level)->coordinateZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_1h_Incomp_SP_27 execution failed");
 }
 
 Cumulant1hIncompSP27::Cumulant1hIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
index 0243046082ce1853011c6632d5a2f80364ebe0db..5130017acc642c92b064a500e79ff685ec2f6d97 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
@@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -159,33 +159,33 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//Ship
 			real coord0X = 281.125f;//7.5f;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
index 6551e1bde300e3a4d2a4f50cefdfff258edfacee..3a740bef6d7fbaa2883b3d36930d49bf9bf0bb3e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantIsoIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std
 
 void CumulantIsoIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_IsoTest_Incomp_SP_27 << < grid, threads >> >(para->getParD(level)->omega,
-		para->getParD(level)->typeOfGridNode,
-		para->getParD(level)->neighborX,
-		para->getParD(level)->neighborY,
-		para->getParD(level)->neighborZ,
-		para->getParD(level)->distributions.f[0],
-		para->getParD(level)->dxxUx,
-		para->getParD(level)->dyyUy,
-		para->getParD(level)->dzzUz,
-		para->getParD(level)->numberOfNodes,
-		para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_IsoTest_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->dxxUx,
+        para->getParD(level)->dyyUy,
+        para->getParD(level)->dzzUz,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_IsoTest_Incomp_SP_27 execution failed");
 }
 
 CumulantIsoIncompSP27::CumulantIsoIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
index 64d697f2b0953cee75f4397e399a0e6128e486a2..1f0ef2ec84c8d4b9b4be57548bde396c3316a80d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
index 40cde56b007f70f98db13d5962f3e746b97637ef..7ae17b97170b4d8474acd6777f7c27411a962681 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15Incomp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared
 
 void CumulantK15Incomp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15Incomp <<< grid, threads >>>(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15Incomp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed");
 }
 
 CumulantK15Incomp::CumulantK15Incomp(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
index fc108ef1ef109a40735e250bd9a0f21491e4f977..01b60b3bf8067a81f99b912c4c0c700963f5448c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -154,33 +154,33 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
index c4311309e4653f2862e303dacb3e2d07646a5061..7645703e0d40176b136762d6b48633f4a9c0d950 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "MRTIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Par
 
 void MRTIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_MRT_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_MRT_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_MRT_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_MRT_Incomp_SP_27 execution failed");
 }
 
 MRTIncompSP27::MRTIncompSP27(std::shared_ptr<Parameter> para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
index f6a283c2f9ba3c15729061ebeabcf34edd0abe97..a6663cc3c72696fda2ce9819203cd19195088730 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
index 77527d5bedab08fdcacb3a103727ae25274b2aa4..43724f9165e2bb8dca1705ae0053612df92413ec 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
@@ -11,7 +11,7 @@ std::shared_ptr<PMCumulantOneCompSP27> PMCumulantOneCompSP27::getNewInstance(std
 
 void PMCumulantOneCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
@@ -30,7 +30,8 @@ void PMCumulantOneCompSP27::run()
 	dim3 threads(numberOfThreads, 1, 1);
 
 	for (int i = 0; i < pm.size(); i++) {
-		LB_Kernel_PM_Cum_One_Comp_SP_27 << < grid, threads >> >(para->getParD(level)->omega,
+		LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>(
+			para->getParD(level)->omega,
 			para->getParD(level)->neighborX,
 			para->getParD(level)->neighborY,
 			para->getParD(level)->neighborZ,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
index 89975d1663fb236295c22b81af4b0544ffc489bb..4f5f61f9d7a61fee8fd3438de5c588c861d8604c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	real porosity,
@@ -24,63 +24,63 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	Distributions27 D;
 	if (EvenOrOdd == true)
 	{
-		D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-		D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-		D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-		D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-		D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-		D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-		D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-		D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-		D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+		D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+		D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 	}
 
 	////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
index 6533c604f32a478cdc6a097e4dd7d0b56e48150d..f2cf530b5d331c71d4a13bd5882a3657a3bbddea 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
@@ -9,7 +9,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	real porosity,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu
deleted file mode 100644
index a9d518d14a286ae3f6b565176969162994afa269..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "TurbulentViscosityCumulantK17CompChim.h"
-#include "cuda/CudaGrid.h"
-#include <logger/Logger.h>
-#include "Parameter/Parameter.h"
-#include "TurbulentViscosityCumulantK17CompChim_Device.cuh"
-
-template<TurbulenceModel turbulenceModel> 
-std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > TurbulentViscosityCumulantK17CompChim<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level)
-{
-	return std::shared_ptr<TurbulentViscosityCumulantK17CompChim<turbulenceModel> >(new TurbulentViscosityCumulantK17CompChim<turbulenceModel>(para,level));
-}
-
-template<TurbulenceModel turbulenceModel>
-void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run()
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes);
-
-	LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel  > <<< grid.grid, grid.threads >>>(   para->getParD(level)->omega, 	
-																											para->getParD(level)->typeOfGridNode, 										para->getParD(level)->neighborX,	
-																											para->getParD(level)->neighborY,	
-																											para->getParD(level)->neighborZ,	
-																											para->getParD(level)->distributions.f[0],	
-																											para->getParD(level)->rho,		
-																											para->getParD(level)->velocityX,		
-																											para->getParD(level)->velocityY,	
-																											para->getParD(level)->velocityZ,	
-																											para->getParD(level)->turbViscosity,
-																											para->getSGSConstant(),
-																											(unsigned long)para->getParD(level)->numberOfNodes,	
-																											level,				
-																											para->getIsBodyForce(),				
-																											para->getForcesDev(),				
-																											para->getParD(level)->forceX_SP,	
-																											para->getParD(level)->forceY_SP,
-																											para->getParD(level)->forceZ_SP,
-																											para->getQuadricLimitersDev(),			
-																											para->getParD(level)->isEvenTimestep);
-
-	getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
-}
-
-template<TurbulenceModel turbulenceModel>
-TurbulentViscosityCumulantK17CompChim<turbulenceModel>::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level)
-{
-	this->para = para;
-	this->level = level;
-
-	myPreProcessorTypes.push_back(InitCompSP27);
-
-	myKernelGroup = BasicKernel;
-
-	VF_LOG_INFO("Using turbulence model: {}", turbulenceModel);
-}
-
-template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>;
-template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>;
-template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h
deleted file mode 100644
index 0d35b68c916e54c6ec6eeeacd7189fe4d9a33c10..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TurbulentViscosityCUMULANT_K17_COMP_CHIM_H
-#define TurbulentViscosityCUMULANT_K17_COMP_CHIM_H
-
-#include "Kernel/KernelImp.h"
-#include "Parameter/Parameter.h"
-
-template<TurbulenceModel turbulenceModel> 
-class TurbulentViscosityCumulantK17CompChim : public KernelImp
-{
-public:
-	static std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level);
-	void run();
-
-private:
-    TurbulentViscosityCumulantK17CompChim();
-    TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level);
-};
-
-#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
deleted file mode 100644
index 32350b95107b68103af0f238fefe095882919092..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
+++ /dev/null
@@ -1,687 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file TurbulentViscosityCumulantK17CompChim_Device.cu
-//! \author Henry Korb, Henrik Asmuth
-//! \date 16/05/2022
-//! \brief CumulantK17CompChim kernel by Martin SchÃ¶nherr that inlcudes turbulent viscosity and other small mods.
-//!
-//! Additions to CumulantK17CompChim:
-//!     - can incorporate local body force 
-//!     - when applying a local body force, the total round of error of forcing+bodyforce is saved and added in next time step
-//!     - uses turbulent viscosity that is computed in separate kernel (as of now AMD)
-//!     - saves macroscopic values (needed for instance for probes, AMD, and actuator models)
-//!
-//=======================================================================================
-/* Device code */
-#include "LBM/LB.h" 
-#include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "Kernel/Utilities/DistributionHelper.cuh"
-
-#include "GPU/TurbulentViscosityInlines.cuh"
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
-
-
-////////////////////////////////////////////////////////////////////////////////
-template<TurbulenceModel turbulenceModel>
-__global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
-	real omega_in,
-	uint* typeOfGridNode,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-    real* rho,
-    real* vx,
-    real* vy,
-    real* vz,
-    real* turbulentViscosity,
-    real SGSconstant,
-	unsigned long size_Mat,
-	int level,
-    bool bodyForce,
-	real* forces,
-    real* bodyForceX,
-    real* bodyForceY,
-    real* bodyForceZ,
-	real* quadricLimiters,
-	bool isEvenTimestep)
-{
-    //////////////////////////////////////////////////////////////////////////
-    //! Cumulant K17 Kernel is based on \ref
-    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-    //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017),
-    //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
-    //!
-    //! The cumulant kernel is executed in the following steps
-    //!
-    ////////////////////////////////////////////////////////////////////////////////
-    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-    //!
-    const unsigned k_000 = vf::gpu::getNodeIndex();
-
-    //////////////////////////////////////////////////////////////////////////
-    // run for all indices in size_Mat and fluid nodes
-    if ((k_000 < size_Mat) && (typeOfGridNode[k_000] == GEO_FLUID)) {
-        //////////////////////////////////////////////////////////////////////////
-        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
-        //! timestep is based on the esoteric twist algorithm \ref <a
-        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-        //! DOI:10.3390/computation5020019 ]</b></a>
-        //!
-        Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, size_Mat, isEvenTimestep);
-
-        ////////////////////////////////////////////////////////////////////////////////
-        //! - Set neighbor indices (necessary for indirect addressing)
-        uint k_M00 = neighborX[k_000];
-        uint k_0M0 = neighborY[k_000];
-        uint k_00M = neighborZ[k_000];
-        uint k_MM0 = neighborY[k_M00];
-        uint k_M0M = neighborZ[k_M00];
-        uint k_0MM = neighborZ[k_0M0];
-        uint k_MMM = neighborZ[k_MM0];
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set local distributions
-        //!
-        real f_000 = (dist.f[DIR_000])[k_000];
-        real f_P00 = (dist.f[DIR_P00])[k_000];
-        real f_M00 = (dist.f[DIR_M00])[k_M00];
-        real f_0P0 = (dist.f[DIR_0P0])[k_000];
-        real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
-        real f_00P = (dist.f[DIR_00P])[k_000];
-        real f_00M = (dist.f[DIR_00M])[k_00M];
-        real f_PP0 = (dist.f[DIR_PP0])[k_000];
-        real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
-        real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
-        real f_MP0 = (dist.f[DIR_MP0])[k_M00];
-        real f_P0P = (dist.f[DIR_P0P])[k_000];
-        real f_M0M = (dist.f[DIR_M0M])[k_M0M];
-        real f_P0M = (dist.f[DIR_P0M])[k_00M];
-        real f_M0P = (dist.f[DIR_M0P])[k_M00];
-        real f_0PP = (dist.f[DIR_0PP])[k_000];
-        real f_0MM = (dist.f[DIR_0MM])[k_0MM];
-        real f_0PM = (dist.f[DIR_0PM])[k_00M];
-        real f_0MP = (dist.f[DIR_0MP])[k_0M0];
-        real f_PPP = (dist.f[DIR_PPP])[k_000];
-        real f_MPP = (dist.f[DIR_MPP])[k_M00];
-        real f_PMP = (dist.f[DIR_PMP])[k_0M0];
-        real f_MMP = (dist.f[DIR_MMP])[k_MM0];
-        real f_PPM = (dist.f[DIR_PPM])[k_00M];
-        real f_MPM = (dist.f[DIR_MPM])[k_M0M];
-        real f_PMM = (dist.f[DIR_PMM])[k_0MM];
-        real f_MMM = (dist.f[DIR_MMM])[k_MMM];
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Define aliases to use the same variable for the moments (m's):
-        //!
-        real& m_111 = f_000;
-        real& m_211 = f_P00;
-        real& m_011 = f_M00;
-        real& m_121 = f_0P0;
-        real& m_101 = f_0M0;
-        real& m_112 = f_00P;
-        real& m_110 = f_00M;
-        real& m_221 = f_PP0;
-        real& m_001 = f_MM0;
-        real& m_201 = f_PM0;
-        real& m_021 = f_MP0;
-        real& m_212 = f_P0P;
-        real& m_010 = f_M0M;
-        real& m_210 = f_P0M;
-        real& m_012 = f_M0P;
-        real& m_122 = f_0PP;
-        real& m_100 = f_0MM;
-        real& m_120 = f_0PM;
-        real& m_102 = f_0MP;
-        real& m_222 = f_PPP;
-        real& m_022 = f_MPP;
-        real& m_202 = f_PMP;
-        real& m_002 = f_MMP;
-        real& m_220 = f_PPM;
-        real& m_020 = f_MPM;
-        real& m_200 = f_PMM;
-        real& m_000 = f_MMM;
-
-        //////////////////////////////////////////////////////(unsigned long)//////////////////////////////
-        //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
-                    (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
-                    ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
-                    ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
-                        f_000;
-
-        real oneOverRho = c1o1 / (c1o1 + drho);
-
-        real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
-                    (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
-                oneOverRho;
-        real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
-                    (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
-                oneOverRho;
-        real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
-                    (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
-                oneOverRho;
-        
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        real factor = c1o1;
-        for (size_t i = 1; i <= level; i++) {
-            factor *= c2o1;
-        }
-        
-        real fx = forces[0];
-        real fy = forces[1];
-        real fz = forces[2];
-
-        if( bodyForce ){
-            fx += bodyForceX[k_000]; 
-            fy += bodyForceY[k_000];
-            fz += bodyForceZ[k_000];
-
-            real vx = vvx;
-            real vy = vvy;
-            real vz = vvz;
-            real acc_x = fx * c1o2 / factor;
-            real acc_y = fy * c1o2 / factor;
-            real acc_z = fz * c1o2 / factor;
-
-            vvx += acc_x;
-            vvy += acc_y;
-            vvz += acc_z;
-            
-        //    // Reset body force. To be used when not using round-off correction.
-        // bodyForceX[k] = 0.0f;
-        // bodyForceY[k] = 0.0f;
-        // bodyForceZ[k] = 0.0f;
-
-            ////////////////////////////////////////////////////////////////////////////////////
-            //!> Round-off correction
-            //!
-            //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
-            //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation.
-            //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can  
-            //!> differ by several orders of magnitude.
-            //!> \note 16/05/2022: Testing, still ongoing! 
-            //!
-            bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1;
-            bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1;
-            bodyForceZ[k_000] = (acc_z-(vvz-vz))*factor*c2o1;
-        }
-        else{
-            vvx += fx * c1o2 / factor;
-            vvy += fy * c1o2 / factor;
-            vvz += fz * c1o2 / factor;
-        }
-        
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // calculate the square of velocities for this lattice node
-        real vx2 = vvx * vvx;
-        real vy2 = vvy * vvy;
-        real vz2 = vvz * vvz;
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to
-        //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        real quadricLimitP = quadricLimiters[0];
-        real quadricLimitM = quadricLimiters[1];
-        real quadricLimitD = quadricLimiters[2];
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a
-        //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-        //! ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Z - Dir
-        forwardInverseChimeraWithK(f_MMM, f_MM0, f_MMP, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(f_M0M, f_M00, f_M0P, vvz, vz2, c9o1,  c1o9);
-        forwardInverseChimeraWithK(f_MPM, f_MP0, f_MPP, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(f_0MM, f_0M0, f_0MP, vvz, vz2, c9o1,  c1o9);
-        forwardInverseChimeraWithK(f_00M, f_000, f_00P, vvz, vz2, c9o4,  c4o9);
-        forwardInverseChimeraWithK(f_0PM, f_0P0, f_0PP, vvz, vz2, c9o1,  c1o9);
-        forwardInverseChimeraWithK(f_PMM, f_PM0, f_PMP, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(f_P0M, f_P00, f_P0P, vvz, vz2, c9o1,  c1o9);
-        forwardInverseChimeraWithK(f_PPM, f_PP0, f_PPP, vvz, vz2, c36o1, c1o36);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Y - Dir
-        forwardInverseChimeraWithK(f_MMM, f_M0M, f_MPM, vvy, vy2, c6o1,  c1o6);
-        forwardChimera(            f_MM0, f_M00, f_MP0, vvy, vy2);
-        forwardInverseChimeraWithK(f_MMP, f_M0P, f_MPP, vvy, vy2, c18o1, c1o18);
-        forwardInverseChimeraWithK(f_0MM, f_00M, f_0PM, vvy, vy2, c3o2,  c2o3);
-        forwardChimera(            f_0M0, f_000, f_0P0, vvy, vy2);
-        forwardInverseChimeraWithK(f_0MP, f_00P, f_0PP, vvy, vy2, c9o2,  c2o9);
-        forwardInverseChimeraWithK(f_PMM, f_P0M, f_PPM, vvy, vy2, c6o1,  c1o6);
-        forwardChimera(            f_PM0, f_P00, f_PP0, vvy, vy2);
-        forwardInverseChimeraWithK(f_PMP, f_P0P, f_PPP, vvy, vy2, c18o1, c1o18);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // X - Dir
-        forwardInverseChimeraWithK(f_MMM, f_0MM, f_PMM, vvx, vx2, c1o1, c1o1);
-        forwardChimera(            f_M0M, f_00M, f_P0M, vvx, vx2);
-        forwardInverseChimeraWithK(f_MPM, f_0PM, f_PPM, vvx, vx2, c3o1, c1o3);
-        forwardChimera(            f_MM0, f_0M0, f_PM0, vvx, vx2);
-        forwardChimera(            f_M00, f_000, f_P00, vvx, vx2);
-        forwardChimera(            f_MP0, f_0P0, f_PP0, vvx, vx2);
-        forwardInverseChimeraWithK(f_MMP, f_0MP, f_PMP, vvx, vx2, c3o1, c1o3);
-        forwardChimera(            f_M0P, f_00P, f_P0P, vvx, vx2);
-        forwardInverseChimeraWithK(f_MPP, f_0PP, f_PPP, vvx, vx2, c3o1, c1o9);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
-        //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!  => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE].
-        //!  - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk
-        //!  viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$.
-        //!  - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz
-        //!  \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$.
-        //!  - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz
-        //!  \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$.
-        //!  - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with
-        //!  simplifications assuming \f$ \omega_2 = 1.0\f$  (modify for different bulk viscosity).
-        //!  - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification
-        //!  all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$.
-        //!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
-        //!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
-        //!
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate modified omega with turbulent viscosity
-        //!
-        real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k_000]);
-        ////////////////////////////////////////////////////////////
-        // 2.
-        real OxxPyyPzz = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 3.
-        real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
-        real OxyyMxzz =
-            c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
-        real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
-                    (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
-        ////////////////////////////////////////////////////////////
-        // 4.
-        real O4 = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 5.
-        real O5 = c1o1;
-        ////////////////////////////////////////////////////////////
-        // 6.
-        real O6 = c1o1;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
-        //! different bulk viscosity).
-        //!
-        real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
-        real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute cumulants from central moments according to Eq. (20)-(23) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////
-        // 4.
-        real c_211 = m_211 - ((m_200 + c1o3) * m_011 + c2o1 * m_110 * m_101) * oneOverRho;
-        real c_121 = m_121 - ((m_020 + c1o3) * m_101 + c2o1 * m_110 * m_011) * oneOverRho;
-        real c_112 = m_112 - ((m_002 + c1o3) * m_110 + c2o1 * m_101 * m_011) * oneOverRho;
-
-        real c_220 = m_220 - (((m_200 * m_020 + c2o1 * m_110 * m_110) + c1o3 * (m_200 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
-        real c_202 = m_202 - (((m_200 * m_002 + c2o1 * m_101 * m_101) + c1o3 * (m_200 + m_002)) * oneOverRho - c1o9 * (drho * oneOverRho));
-        real c_022 = m_022 - (((m_002 * m_020 + c2o1 * m_011 * m_011) + c1o3 * (m_002 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
-        ////////////////////////////////////////////////////////////
-        // 5.
-        real c_122 =
-            m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
-                    c1o3 * (m_120 + m_102)) *
-                    oneOverRho;
-        real c_212 =
-            m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
-                    c1o3 * (m_210 + m_012)) *
-                    oneOverRho;
-        real c_221 =
-            m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
-                    c1o3 * (m_021 + m_201)) *
-                    oneOverRho;
-        ////////////////////////////////////////////////////////////
-        // 6.
-        real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
-                                c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
-                                c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
-                                oneOverRho +
-                            (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
-                                c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
-                                oneOverRho * oneOverRho -
-                                c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
-                            (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
-                                (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
-                                oneOverRho * oneOverRho * c2o3 +
-                                c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute linear combinations of second and third order cumulants
-        //!
-        ////////////////////////////////////////////////////////////
-        // 2.
-        real mxxPyyPzz = m_200 + m_020 + m_002;
-        real mxxMyy    = m_200 - m_020;
-        real mxxMzz    = m_200 - m_002;
-        ////////////////////////////////////////////////////////////
-        // 3.
-        real mxxyPyzz = m_210 + m_012;
-        real mxxyMyzz = m_210 - m_012;
-
-        real mxxzPyyz = m_201 + m_021;
-        real mxxzMyyz = m_201 - m_021;
-
-        real mxyyPxzz = m_120 + m_102;
-        real mxyyMxzz = m_120 - m_102;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // incl. correction
-        ////////////////////////////////////////////////////////////
-        //! - Compute velocity  gradients from second order cumulants according to Eq. (27)-(32)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times
-        //! the gradients later.
-        //!
-        real Dxy  = -c3o1 * omega * m_110;
-        real Dxz  = -c3o1 * omega * m_101;
-        real Dyz  = -c3o1 * omega * m_011;
-        real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz);
-        real dyuy = dxux + omega * c3o2 * mxxMyy;
-        real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        switch (turbulenceModel)
-        {
-        case TurbulenceModel::AMD:  //AMD is computed in separate kernel
-            break;
-        case TurbulenceModel::Smagorinsky:
-            turbulentViscosity[k_000] = calcTurbulentViscositySmagorinsky(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
-            break;
-        case TurbulenceModel::QR:
-            turbulentViscosity[k_000] = calcTurbulentViscosityQR(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
-            break;
-        default:
-            break;
-        }
-        ////////////////////////////////////////////////////////////
-        //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-        mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-        mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        ////no correction
-        // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);
-        // mxxMyy += -(-omega) * (-mxxMyy);
-        // mxxMzz += -(-omega) * (-mxxMzz);
-        //////////////////////////////////////////////////////////////////////////
-        m_011 += omega * (-m_011);
-        m_101 += omega * (-m_101);
-        m_110 += omega * (-m_110);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // relax
-        //////////////////////////////////////////////////////////////////////////
-        // incl. limiter
-        //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD);
-        m_111 += wadjust * (-m_111);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP);
-        mxxyPyzz += wadjust * (-mxxyPyzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + quadricLimitM);
-        mxxyMyzz += wadjust * (-mxxyMyzz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + quadricLimitP);
-        mxxzPyyz += wadjust * (-mxxzPyyz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + quadricLimitM);
-        mxxzMyyz += wadjust * (-mxxzMyyz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + quadricLimitP);
-        mxyyPxzz += wadjust * (-mxyyPxzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + quadricLimitM);
-        mxyyMxzz += wadjust * (-mxyyMxzz);
-        //////////////////////////////////////////////////////////////////////////
-        // no limiter
-        // mfbbb += OxyyMxzz * (-mfbbb);
-        // mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
-        // mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
-        // mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
-        // mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
-        // mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
-        // mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute inverse linear combinations of second and third order cumulants
-        //!
-        m_200 = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-        m_020 = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
-        m_002 = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
-
-        m_210 = ( mxxyMyzz + mxxyPyzz) * c1o2;
-        m_012 = (-mxxyMyzz + mxxyPyzz) * c1o2;
-        m_201 = ( mxxzMyyz + mxxzPyyz) * c1o2;
-        m_021 = (-mxxzMyyz + mxxzPyyz) * c1o2;
-        m_120 = ( mxyyMxzz + mxyyPxzz) * c1o2;
-        m_102 = (-mxyyMxzz + mxyyPxzz) * c1o2;
-        //////////////////////////////////////////////////////////////////////////
-
-        //////////////////////////////////////////////////////////////////////////
-        // 4.
-        // no limiter
-        //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according
-        //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-        c_022 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_022);
-        c_202 = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_202);
-        c_220 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (c_220);
-        c_112 = -O4 * (c1o1 / omega - c1o2) * Dxy           * c1o3 * factorB + (c1o1 - O4) * (c_112);
-        c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz           * c1o3 * factorB + (c1o1 - O4) * (c_121);
-        c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz           * c1o3 * factorB + (c1o1 - O4) * (c_211);
-
-
-        //////////////////////////////////////////////////////////////////////////
-        // 5.
-        c_122 += O5 * (-c_122);
-        c_212 += O5 * (-c_212);
-        c_221 += O5 * (-c_221);
-
-        //////////////////////////////////////////////////////////////////////////
-        // 6.
-        c_222 += O6 * (-c_222);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
-        //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-        //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-        //!
-
-        //////////////////////////////////////////////////////////////////////////
-        // 4.
-        m_211 = c_211 + c1o3 * ((c3o1 * m_200 + c1o1) * m_011 + c6o1 * m_110 * m_101) * oneOverRho;
-        m_121 = c_121 + c1o3 * ((c3o1 * m_020 + c1o1) * m_101 + c6o1 * m_110 * m_011) * oneOverRho;
-        m_112 = c_112 + c1o3 * ((c3o1 * m_002 + c1o1) * m_110 + c6o1 * m_101 * m_011) * oneOverRho;
-
-        m_220 =
-            c_220 + (((m_200 * m_020 + c2o1 * m_110 * m_110) * c9o1 + c3o1 * (m_200 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
-        m_202 =
-            c_202 + (((m_200 * m_002 + c2o1 * m_101 * m_101) * c9o1 + c3o1 * (m_200 + m_002)) * oneOverRho - (drho * oneOverRho)) * c1o9;
-        m_022 =
-            c_022 + (((m_002 * m_020 + c2o1 * m_011 * m_011) * c9o1 + c3o1 * (m_002 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
-
-        //////////////////////////////////////////////////////////////////////////
-        // 5.
-        m_122 = c_122 + c1o3 *
-                (c3o1 * (m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
-                (m_120 + m_102)) * oneOverRho;
-        m_212 = c_212 + c1o3 *
-                (c3o1 * (m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
-                (m_210 + m_012)) * oneOverRho;
-        m_221 = c_221 + c1o3 *
-                (c3o1 * (m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
-                (m_021 + m_201)) * oneOverRho;
-
-        //////////////////////////////////////////////////////////////////////////
-        // 6.
-        m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
-                        c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
-                        c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
-                        oneOverRho +
-                        (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
-                        c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
-                        oneOverRho * oneOverRho -
-                        c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
-                        (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
-                        (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
-                        oneOverRho * oneOverRho * c2o3 +
-                        c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        m_100 = -m_100;
-        m_010 = -m_010;
-        m_001 = -m_001;
-
-        //Write to array here to distribute read/write
-        rho[k_000] = drho;
-        vx[k_000] = vvx;
-        vy[k_000] = vvy;
-        vz[k_000] = vvz;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a
-        //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-        //! ]</b></a>
-        //!
-        ////////////////////////////////////////////////////////////////////////////////////
-        // X - Dir
-        backwardInverseChimeraWithK(m_000, m_100, m_200, vvx, vx2, c1o1, c1o1);
-        backwardChimera(            m_010, m_110, m_210, vvx, vx2);
-        backwardInverseChimeraWithK(m_020, m_120, m_220, vvx, vx2, c3o1, c1o3);
-        backwardChimera(            m_001, m_101, m_201, vvx, vx2);
-        backwardChimera(            m_011, m_111, m_211, vvx, vx2);
-        backwardChimera(            m_021, m_121, m_221, vvx, vx2);
-        backwardInverseChimeraWithK(m_002, m_102, m_202, vvx, vx2, c3o1, c1o3);
-        backwardChimera(            m_012, m_112, m_212, vvx, vx2);
-        backwardInverseChimeraWithK(m_022, m_122, m_222, vvx, vx2, c9o1, c1o9);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Y - Dir
-        backwardInverseChimeraWithK(m_000, m_010, m_020, vvy, vy2, c6o1, c1o6);
-        backwardChimera(            m_001, m_011, m_021, vvy, vy2);
-        backwardInverseChimeraWithK(m_002, m_012, m_022, vvy, vy2, c18o1, c1o18);
-        backwardInverseChimeraWithK(m_100, m_110, m_120, vvy, vy2, c3o2, c2o3);
-        backwardChimera(            m_101, m_111, m_121, vvy, vy2);
-        backwardInverseChimeraWithK(m_102, m_112, m_122, vvy, vy2, c9o2, c2o9);
-        backwardInverseChimeraWithK(m_200, m_210, m_220, vvy, vy2, c6o1, c1o6);
-        backwardChimera(            m_201, m_211, m_221, vvy, vy2);
-        backwardInverseChimeraWithK(m_202, m_212, m_222, vvy, vy2, c18o1, c1o18);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        // Z - Dir
-        backwardInverseChimeraWithK(m_000, m_001, m_002, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(m_010, m_011, m_012, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(m_020, m_021, m_022, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(m_100, m_101, m_102, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(m_110, m_111, m_112, vvz, vz2, c9o4, c4o9);
-        backwardInverseChimeraWithK(m_120, m_121, m_122, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(m_200, m_201, m_202, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz2, c36o1, c1o36);
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Write distributions: style of reading and writing the distributions from/to
-        //! stored arrays dependent on timestep is based on the esoteric twist algorithm
-        //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-        //! DOI:10.3390/computation5020019 ]</b></a>
-        //!
-        (dist.f[DIR_P00])[k_000]    = f_M00;
-        (dist.f[DIR_M00])[k_M00]    = f_P00;
-        (dist.f[DIR_0P0])[k_000]    = f_0M0;
-        (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
-        (dist.f[DIR_00P])[k_000]    = f_00M;
-        (dist.f[DIR_00M])[k_00M]    = f_00P;
-        (dist.f[DIR_PP0])[k_000]   = f_MM0;
-        (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
-        (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
-        (dist.f[DIR_MP0])[k_M00]   = f_PM0;
-        (dist.f[DIR_P0P])[k_000]   = f_M0M;
-        (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
-        (dist.f[DIR_P0M])[k_00M]   = f_M0P;
-        (dist.f[DIR_M0P])[k_M00]   = f_P0M;
-        (dist.f[DIR_0PP])[k_000]   = f_0MM;
-        (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
-        (dist.f[DIR_0PM])[k_00M]   = f_0MP;
-        (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
-        (dist.f[DIR_000])[k_000] = f_000;
-        (dist.f[DIR_PPP])[k_000]  = f_MMM;
-        (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
-        (dist.f[DIR_PPM])[k_00M]  = f_MMP;
-        (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
-        (dist.f[DIR_MPP])[k_M00]  = f_PMM;
-        (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
-        (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
-        (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
-    }
-}
-
-template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::AMD > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
-
-template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::Smagorinsky > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
-
-template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::QR > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
deleted file mode 100644
index 5ef37557399f263d25edf03b02b00f6a03c6e1cb..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef LB_Kernel_TURBULENT_VISCOSITY_CUMULANT_K17_COMP_CHIM_H
-#define LB_Kernel_TURBULENT_VISCOSITY_CUMULANT_K17_COMP_CHIM_H
-
-#include <DataTypes.h>
-#include <curand.h>
-
-template< TurbulenceModel turbulenceModel > __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
-	real omega_in,
-	uint* typeOfGridNode,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	real* rho,
-	real* vx,
-    real* vy,
-    real* vz,
-	real* turbulentViscosity,
-	real SGSconstant,
-	unsigned long size_Mat,
-	int level,
-	bool bodyForce,
-	real* forces,
-	real* bodyForceX,
-	real* bodyForceY,
-	real* bodyForceZ,
-	real* quadricLimiters,
-	bool isEvenTimestep);
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp
deleted file mode 100644
index f3615a89994f0ca1fafdc1eda905d3c3b615d478..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "TurbulentViscosityFluidFlowCompStrategy.h"
-
-#include "Parameter/Parameter.h"
-
-std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> TurbulentViscosityFluidFlowCompStrategy::getInstance()
-{
-    static std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> uniqueInstance;
-	if (!uniqueInstance)
-        uniqueInstance = std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy>(new TurbulentViscosityFluidFlowCompStrategy());
-	return uniqueInstance;
-}
-
-bool TurbulentViscosityFluidFlowCompStrategy::checkParameter(std::shared_ptr<Parameter> para)
-{
-	if (!para->getUseTurbulentViscosity())
-		return false;
-	else if (!para->getCompOn())
-		return false;
-	else
-		return true;
-}
-
-TurbulentViscosityFluidFlowCompStrategy::TurbulentViscosityFluidFlowCompStrategy() {}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h
deleted file mode 100644
index 95eff17777f7f0d1c3e05fe1b0d93892a88646a4..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef AMD_FLUID_FLOW_COMP_STRATEGY_H
-#define AMD_FLUID_FLOW_COMP_STRATEGY_H
-
-#include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h"
-
-
-class TurbulentViscosityFluidFlowCompStrategy : public CheckParameterStrategy
-{
-public:
-    static std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> getInstance();
-
-	bool checkParameter(std::shared_ptr<Parameter> para);
-
-private:
-    TurbulentViscosityFluidFlowCompStrategy();
-
-};
-#endif 
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
index cfcb70cd2bd6f3cc8ec4349650c44b7d3b0619fc..2b8a7d61e8966e2ed00022986311ae68ac0ca6d6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK15Comp> WaleCumulantK15Comp::getNewInstance(std::sh
 
 void WaleCumulantK15Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
@@ -28,22 +28,23 @@ void WaleCumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK15Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																para->getParD(level)->typeOfGridNode,
-																para->getParD(level)->neighborX,
-																para->getParD(level)->neighborY,
-																para->getParD(level)->neighborZ,
-																para->getParD(level)->neighborInverse,
-																para->getParD(level)->velocityX,
-																para->getParD(level)->velocityY,
-																para->getParD(level)->velocityZ,
-																para->getParD(level)->distributions.f[0],
-																para->getParD(level)->turbViscosity,
-																para->getParD(level)->numberOfNodes,
-																level,
-																para->getTimestepOfCoarseLevel(),
-																para->getForcesDev(),
-																para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getTimestepOfCoarseLevel(),
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK15Comp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
index 3da25060e6c82ea685a1659fecc8cf66eeaf44c4..a7018d1246c0832753df144ffbf2625b55f5508e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
@@ -46,63 +46,63 @@ __global__ void LB_Kernel_WaleCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -136,33 +136,33 @@ __global__ void LB_Kernel_WaleCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
index 15b808279a4c9dc771531f118cb369b7c5380a84..49ee20b44f37b01cd9bc837024a47c1428c00a18 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleBySoniMalavCumulantK15Comp> WaleBySoniMalavCumulantK15Comp::
 
 void WaleBySoniMalavCumulantK15Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,21 +32,22 @@ void WaleBySoniMalavCumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleBySoniMalavCumulantK15Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																			para->getParD(level)->typeOfGridNode,
-																			para->getParD(level)->neighborX,
-																			para->getParD(level)->neighborY,
-																			para->getParD(level)->neighborZ,
-																			para->getParD(level)->neighborInverse,
-																			para->getParD(level)->velocityX,
-																			para->getParD(level)->velocityY,
-																			para->getParD(level)->velocityZ,
-																			para->getParD(level)->distributions.f[0],
-																			para->getParD(level)->turbViscosity,
-																			para->getParD(level)->numberOfNodes,
-																			level,
-																			para->getForcesDev(),
-																			para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleBySoniMalavCumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleBySoniMalavCumulantK15Comp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
index 511219c352c4d156428565f718191a70b9cc6c32..6258c72c36cafa27b06b2934db42a5813ed74f99 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
@@ -45,63 +45,63 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -115,33 +115,33 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
index 5eeea51301c666cf17546c85a444413111bebf2c..c9c16e2d2d2259656248948f3f10977c8f18fd24 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17Comp> WaleCumulantK17Comp::getNewInstance(std::sh
 
 void WaleCumulantK17Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,23 +32,24 @@ void WaleCumulantK17Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>(para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->neighborInverse,
-														para->getParD(level)->velocityX,
-														para->getParD(level)->velocityY,
-														para->getParD(level)->velocityZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->turbViscosity,
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getTimestepOfCoarseLevel(),
-														para->getForcesDev(),
-                                                        para->getQuadricLimitersDev(),
-														para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getTimestepOfCoarseLevel(),
+		para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK17Comp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
index 8aaa13ab1d868e15ea5707d1566ba653b44c645d..e3161e0d26efe8993bb4b6c34bda32bf15af5d3d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
@@ -47,63 +47,63 @@ __global__ void LB_Kernel_WaleCumulantK17Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ __global__ void LB_Kernel_WaleCumulantK17Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
index 98dca58f522bf02ce66328819e42c717f0ceef28..b3cdd494c02c6649d60818b6b264b8db8b79d426 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17DebugComp> WaleCumulantK17DebugComp::getNewInstan
 
 void WaleCumulantK17DebugComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,34 +32,34 @@ void WaleCumulantK17DebugComp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK17DebugComp << < grid, threads >> >(
-																		para->getParD(level)->omega,
-																		para->getParD(level)->typeOfGridNode,
-																		para->getParD(level)->neighborX,
-																		para->getParD(level)->neighborY,
-																		para->getParD(level)->neighborZ,
-																		para->getParD(level)->neighborInverse,
-																		para->getParD(level)->velocityX,
-																		para->getParD(level)->velocityY,
-																		para->getParD(level)->velocityZ,
-																		para->getParD(level)->distributions.f[0],
-																		para->getParD(level)->turbViscosity,
-																		para->getParD(level)->gSij,
-																		para->getParD(level)->gSDij,
-																		para->getParD(level)->gDxvx,
-																		para->getParD(level)->gDyvx,
-																		para->getParD(level)->gDzvx,
-																		para->getParD(level)->gDxvy,
-																		para->getParD(level)->gDyvy,
-																		para->getParD(level)->gDzvy,
-																		para->getParD(level)->gDxvz,
-																		para->getParD(level)->gDyvz,
-																		para->getParD(level)->gDzvz,
-																		para->getParD(level)->numberOfNodes,
-																		level,
-																		para->getForcesDev(),
-                                                                        para->getQuadricLimitersDev(),
-																		para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK17DebugComp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->gSij,
+		para->getParD(level)->gSDij,
+		para->getParD(level)->gDxvx,
+		para->getParD(level)->gDyvx,
+		para->getParD(level)->gDzvx,
+		para->getParD(level)->gDxvy,
+		para->getParD(level)->gDyvy,
+		para->getParD(level)->gDzvy,
+		para->getParD(level)->gDxvz,
+		para->getParD(level)->gDyvz,
+		para->getParD(level)->gDzvz,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK17DebugComp execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
index a1feba477a6555ea728311a6e99d5302652813ff..63f4ecc8716fcd606fb6a75709408b0885d781e9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
@@ -57,63 +57,63 @@ __global__ void LB_Kernel_WaleCumulantK17DebugComp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM]= &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM]= &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM]= &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM]= &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h
deleted file mode 100644
index f7822d63fa0efd34b27773dffdeebddf521a8792..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef CHIMERA_TRANSFORMATION_H
-#define CHIMERA_TRANSFORMATION_H
-
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief forward chimera transformation \ref forwardInverseChimeraWithK
-//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> Modified for lower round-off errors.
-inline __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
-{
-    real m2 = mfa + mfc;
-    real m1 = mfc - mfa;
-    real m0 = m2 + mfb;
-    mfa     = m0;
-    m0 *= Kinverse;
-    m0 += c1o1;
-    mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief backward chimera transformation \ref backwardInverseChimeraWithK
-//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> Modified for lower round-off errors.
-inline __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
-{
-    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
-    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
-    mfc     = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
-    mfa     = m0;
-    mfb     = m1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief forward chimera transformation \ref forwardChimera
-//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
-//! errors.
-inline __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
-{
-    real m1 = (mfa + mfc) + mfb;
-    real m2 = mfc - mfa;
-    mfc     = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
-    mfb     = m2 - vv * m1;
-    mfa     = m1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief backward chimera transformation \ref backwardChimera
-//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
-//! errors.
-inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
-{
-    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
-    mfc     = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
-    mfb     = mb;
-    mfa     = ma;
-}
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
index 7c477c539dc3526389dc22563b50501e778a63f3..240a6ffbace64147aa67224fe72c946761fdc452 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -2,8 +2,7 @@
 
 #include <cuda_runtime.h>
 
-
-#include <lbm/constants/NumericConstants.h>
+#include "lbm/constants/NumericConstants.h"
 #include "lbm/constants/D3Q27.h"
 using namespace vf::lbm::dir;
 
@@ -80,10 +79,4 @@ __device__ void DistributionWrapper::write()
     (distribution_references.f[DIR_000])[k]   = distribution.f[vf::lbm::dir::ZZZ];
 }
 
-__device__ bool isValidFluidNode(uint nodeType)
-{
-    return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
-}
-
-
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
index 1009ecfa92f31e821d825ad72ba681bc3ae96d1b..599f3f46668c07da49725770177d77239f8ef9df 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -37,76 +37,13 @@
 
 #include "lbm/KernelParameter.h"
 #include "lbm/constants/D3Q27.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::dir;
 
 namespace vf::gpu
 {
 
-__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep)
-{
-    if (isEvenTimestep)
-    {
-        dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
-        dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes];
-        dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes];
-        dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
-        dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
-        dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes];
-        dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes];
-        dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
-        dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
-        dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
-        dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
-        dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes];
-        dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes];
-        dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes];
-        dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes];
-        dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes];
-        dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes];
-        dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes];
-        dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes];
-        dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes];
-        dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes];
-        dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes];
-        dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes];
-        dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes];
-        dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes];
-        dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes];
-        dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes];
-    }
-    else
-    {
-         dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes];
-         dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes];
-         dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
-         dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
-         dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes];
-         dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes];
-         dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
-         dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
-         dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
-         dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
-         dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes];
-         dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes];
-         dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes];
-         dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes];
-         dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes];
-         dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes];
-         dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes];
-         dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes];
-         dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
-         dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes];
-         dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes];
-         dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes];
-         dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes];
-         dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes];
-         dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes];
-         dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes];
-         dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes];
-    }
-}
-
 /**
 *  Getting references to the 27 directions.
 *  @params distributions 1D real* array containing all data (number of elements = 27 * matrix_size)
@@ -114,7 +51,7 @@ __inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &
 *  @params isEvenTimestep: stored data dependent on timestep is based on the esoteric twist algorithm
 *  @return a data struct containing the addresses to the 27 directions within the 1D distribution array
 */
-__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int numberOfLBnodes, bool isEvenTimestep){
+__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, const unsigned long long numberOfLBnodes, const bool isEvenTimestep){
     DistributionReferences27 distribution_references;
     getPointersToDistributions(distribution_references, distributions, numberOfLBnodes, isEvenTimestep);
     return distribution_references;
@@ -157,20 +94,6 @@ struct DistributionWrapper
     const uint kbsw;
 };
 
-__inline__ __device__ unsigned int getNodeIndex()
-{
-    const unsigned x = threadIdx.x;
-    const unsigned y = blockIdx.x;
-    const unsigned z = blockIdx.y;
-
-    const unsigned nx = blockDim.x;
-    const unsigned ny = gridDim.x;
-
-    return nx * (ny * z + y) + x;
-}
-
-__device__ bool isValidFluidNode(uint nodeType);
-
 }
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index 53ec240f096080097416e640fdd095c3812fb34c..5a2d8c9a426e5cb23ca75f91aaf6fbff75cba72b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -8,11 +8,9 @@
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.h"
-#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.h"
-#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h"
-#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.h"
@@ -49,9 +47,6 @@
 #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.h"
 #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.h"
 
-//turbulent viscosity kernel
-#include "Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h"
-
 //strategies
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/FluidFlowIncompStrategy.h"
@@ -61,7 +56,6 @@
 #include "Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADMod7IncompStrategy.h"
 #include "Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/PMFluidFlowCompStrategy.h"
 #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/WaleFluidFlowCompStrategy.h"
-#include "Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h"
 
 std::vector<std::shared_ptr<Kernel>> KernelFactoryImp::makeKernels(std::shared_ptr<Parameter> para)
 {
@@ -118,9 +112,6 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     } else if (kernel == "CumulantCompSP27") {
         newKernel     = CumulantCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17Comp") {
-        newKernel     = CumulantK17Comp::getNewInstance(para, level);
-        checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK15Unified") {
         newKernel     = std::make_shared<vf::gpu::CumulantK15Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
@@ -133,12 +124,26 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     } else if (kernel == "CumulantK17CompChim") {
         newKernel     = CumulantK17CompChim::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17CompChimStream") {
-        newKernel     = CumulantK17CompChimStream::getNewInstance(para, level);
-        checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17CompChimRedesigned") {
-        newKernel     = CumulantK17CompChimRedesigned::getNewInstance(para, level);
-        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "CumulantK17"){               
+        switch(para->getTurbulenceModel())                                          
+        {   
+            case TurbulenceModel::AMD:
+                newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level);   
+                break;
+            case TurbulenceModel::Smagorinsky:
+                newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);  
+                break;
+            case TurbulenceModel::QR:
+                newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level);  
+                break;
+            case TurbulenceModel::None:
+                newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level); 
+                break;
+            default:
+                throw std::runtime_error("Unknown turbulence model!");
+            break;                                                              
+        }                                                                       
+        checkStrategy = FluidFlowCompStrategy::getInstance();       
     } else if (kernel == "CumulantAll4CompSP27") {
         newKernel     = CumulantAll4CompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
@@ -197,35 +202,9 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
         newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level);// ||
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();               // wale model
     }                                                                          //===============
-    else if (kernel == "TurbulentViscosityCumulantK17CompChim"){               // compressible with turbulent viscosity
-        switch(para->getTurbulenceModel())                                     //       ||          
-        {                                                                      //       \/      //
-            case TurbulenceModel::AMD:
-                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>::getNewInstance(para, level);   
-                break;
-            case TurbulenceModel::Smagorinsky:
-                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);  
-                break;
-            case TurbulenceModel::QR:
-                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>::getNewInstance(para, level);  
-                break;
-            case TurbulenceModel::None:
-                throw std::runtime_error("TurbulentViscosityCumulantK17CompChim currently not implemented for TurbulenceModel::None!");
-                break;
-            default:
-                throw std::runtime_error("Unknown turbulence model!");
-            break;                                                              
-        }                                                                       
-        checkStrategy = TurbulentViscosityFluidFlowCompStrategy::getInstance(); 
-                                                                                //     /\      //
-                                                                                //     ||    
-                                                                                // compressible with turbulent viscosity  
-                                                                                //===============         
-    }
     else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
-
     newKernel->setCheckParameterStrategy(checkStrategy);
     para->setKernelNeedsFluidNodeIndicesToRun(newKernel->getKernelUsesFluidNodeIndices());
     return newKernel;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h
deleted file mode 100644
index 13ce5d88aaa7cb49225fa914c1f59c2de05802f5..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file scalingHelperFunctions.h
-//! \ingroup GPU/Kernel/Utilities
-//! \author Martin Schoenherr, Anna Wellmann
-//=======================================================================================
-
-#ifndef SCALING_HELPER_FUNCTIONS_H
-#define SCALING_HELPER_FUNCTIONS_H
-
-#include "LBM/LB.h" 
-#include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-
-__device__ __inline__ void calculateMomentsOnSourceNodes(
-    Distributions27& dist,
-    real& omega,
-    unsigned int& k_000,
-    unsigned int& k_M00,
-    unsigned int& k_0M0,
-    unsigned int& k_00M,
-    unsigned int& k_MM0,
-    unsigned int& k_M0M,
-    unsigned int& k_0MM,
-    unsigned int& k_MMM,
-    real& drho,
-    real& velocityX,
-    real& velocityY,
-    real& velocityZ,
-    real& kxyFromfcNEQ,
-    real& kyzFromfcNEQ,
-    real& kxzFromfcNEQ,
-    real& kxxMyyFromfcNEQ,
-    real& kxxMzzFromfcNEQ
-    ){
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set local distributions (f's) on source nodes:
-        //!
-        real f_000 = (dist.f[DIR_000])[k_000]; 
-        real f_P00 = (dist.f[DIR_P00])[k_000];
-        real f_M00 = (dist.f[DIR_M00])[k_M00];
-        real f_0P0 = (dist.f[DIR_0P0])[k_000];
-        real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
-        real f_00P = (dist.f[DIR_00P])[k_000];
-        real f_00M = (dist.f[DIR_00M])[k_00M];
-        real f_PP0 = (dist.f[DIR_PP0])[k_000];
-        real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
-        real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
-        real f_MP0 = (dist.f[DIR_MP0])[k_M00];
-        real f_P0P = (dist.f[DIR_P0P])[k_000];
-        real f_M0M = (dist.f[DIR_M0M])[k_M0M];
-        real f_P0M = (dist.f[DIR_P0M])[k_00M];
-        real f_M0P = (dist.f[DIR_M0P])[k_M00];
-        real f_0PP = (dist.f[DIR_0PP])[k_000];
-        real f_0MM = (dist.f[DIR_0MM])[k_0MM];
-        real f_0PM = (dist.f[DIR_0PM])[k_00M];
-        real f_0MP = (dist.f[DIR_0MP])[k_0M0];
-        real f_PPP = (dist.f[DIR_PPP])[k_000];
-        real f_MPP = (dist.f[DIR_MPP])[k_M00];
-        real f_PMP = (dist.f[DIR_PMP])[k_0M0];
-        real f_MMP = (dist.f[DIR_MMP])[k_MM0];
-        real f_PPM = (dist.f[DIR_PPM])[k_00M];
-        real f_MPM = (dist.f[DIR_MPM])[k_M0M];
-        real f_PMM = (dist.f[DIR_PMM])[k_0MM];
-        real f_MMM = (dist.f[DIR_MMM])[k_MMM];
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
-                (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
-                 ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
-                 ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
-                   f_000;
-
-        real oneOverRho = c1o1 / (c1o1 + drho);
-
-        velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
-                     (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
-                    oneOverRho;
-        velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
-                     (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
-                    oneOverRho;
-        velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
-                     (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
-                    oneOverRho;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate second order moments for interpolation
-        //!
-        // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction
-        kxyFromfcNEQ =
-            -c3o1 * omega *
-            ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) /
-                 (c1o1 + drho) -
-             ((velocityX * velocityY)));
-        kyzFromfcNEQ =
-            -c3o1 * omega *
-            ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) /
-                 (c1o1 + drho) -
-             ((velocityY * velocityZ)));
-        kxzFromfcNEQ =
-            -c3o1 * omega *
-            ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) /
-                 (c1o1 + drho) -
-             ((velocityX * velocityZ)));
-        kxxMyyFromfcNEQ =
-            -c3o2 * omega *
-            ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / (c1o1 + drho) -
-             ((velocityX * velocityX - velocityY * velocityY)));
-        kxxMzzFromfcNEQ =
-            -c3o2 * omega *
-            ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / (c1o1 + drho) -
-             ((velocityX * velocityX - velocityZ * velocityZ)));
-}
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
index 9ca813ac4987af618491422acb60207b7fee543c..5a36daecd5a82fc8a052bf51fedc1cb35b94a960 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
@@ -90,10 +90,10 @@ void ADKernelManager::initAD(const int level) const
 ////////////////////////////////////////////////////////////////////////////////
 void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryManager> cudaMemoryManager) const
 {
-    for (uint j = 1; j <= para->getParH(level)->numberOfNodes; j++) {
-        const real coordX = para->getParH(level)->coordinateX[j];
-        const real coordY = para->getParH(level)->coordinateY[j];
-        const real coordZ = para->getParH(level)->coordinateZ[j];
+    for (size_t index = 1; index <= para->getParH(level)->numberOfNodes; index++) {
+        const real coordX = para->getParH(level)->coordinateX[index];
+        const real coordY = para->getParH(level)->coordinateY[index];
+        const real coordZ = para->getParH(level)->coordinateZ[index];
 
         real concentration;
 
@@ -104,7 +104,7 @@ void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryMan
             concentration = real(0.0);
         }
 
-        para->getParH(level)->concentration[j] = concentration;
+        para->getParH(level)->concentration[index] = concentration;
     }
 
     cudaMemoryManager->cudaCopyConcentrationHostToDevice(level);
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index cc945ea225a28c58dca4ceefdb80fffb76228b21..e8fc3f318c920be36be7861a28659124a7b1e977 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -38,6 +38,7 @@
 
 #include "BCKernelManager.h"
 #include "Factories/BoundaryConditionFactory.h"
+#include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
 #include "Calculation/Cp.h"
 #include "Calculation/DragLift.h"
 #include "GPU/GPU_Interface.h"
@@ -51,6 +52,7 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac
     this->pressureBoundaryConditionPre  = bcFactory->getPressureBoundaryConditionPre();
     this->geometryBoundaryConditionPost = bcFactory->getGeometryBoundaryConditionPost();
     this->stressBoundaryConditionPost   = bcFactory->getStressBoundaryConditionPost();
+    this->precursorBoundaryConditionPost = bcFactory->getPrecursorBoundaryConditionPost();
 
     checkBoundaryCondition(this->velocityBoundaryConditionPost, this->para->getParD(0)->velocityBC,
                            "velocityBoundaryConditionPost");
@@ -64,6 +66,8 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac
                            "geometryBoundaryConditionPost");
     checkBoundaryCondition(this->stressBoundaryConditionPost, this->para->getParD(0)->stressBC,
                            "stressBoundaryConditionPost");
+    checkBoundaryCondition(this->precursorBoundaryConditionPost, this->para->getParD(0)->precursorBC,
+                           "precursorBoundaryConditionPost");
 }
 
 void BCKernelManager::runVelocityBCKernelPre(const int level) const
@@ -387,3 +391,41 @@ void BCKernelManager::runNoSlipBCKernelPost(const int level) const{
         noSlipBoundaryConditionPost(para->getParD(level).get(), &(para->getParD(level)->noSlipBC));
     }
 }
+
+void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager)
+{
+    if(para->getParH(level)->precursorBC.numberOfBCnodes == 0) return;
+
+    uint t_level = para->getTimeStep(level, t, true);
+
+    uint lastTime =    (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into last arrays
+    uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into current arrays
+    uint nextTime =     para->getParD(level)->precursorBC.nPrecursorReads   *para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into next arrays
+    
+    if(t_level>=currentTime)
+    {
+        //cycle time
+        lastTime = currentTime;
+        currentTime = nextTime;
+        nextTime += para->getParD(level)->precursorBC.timeStepsBetweenReads;
+
+        //cycle pointers
+        real* tmp = para->getParD(level)->precursorBC.last;
+        para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.current;
+        para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next;
+        para->getParD(level)->precursorBC.next = tmp;
+
+        real loadTime = nextTime*pow(2,-level)*para->getTimeRatio();
+
+        for(auto reader : para->getParH(level)->transientBCInputFileReader)
+        {   
+            reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, loadTime);
+        }
+        cudaMemoryManager->cudaCopyPrecursorData(level);
+        para->getParD(level)->precursorBC.nPrecursorReads++;
+        para->getParH(level)->precursorBC.nPrecursorReads++;  
+    }
+    
+    real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.timeStepsBetweenReads;
+    precursorBoundaryConditionPost(para->getParD(level).get(), &para->getParD(level)->precursorBC, tRatio, para->getVelocityRatio());
+}
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
index 423a9cc9056281a3a2a135ae32fa26cc47f93967..339100e6b5307e8e60f8d0846560bf89c6eea1a1 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
@@ -41,6 +41,7 @@
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 
+
 class CudaMemoryManager;
 class BoundaryConditionFactory;
 class Parameter;
@@ -48,6 +49,7 @@ struct LBMSimulationParameter;
 
 using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>;
 using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
+using precursorBoundaryCondition = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>;
 
 //! \class BCKernelManager
 //! \brief manage the cuda kernel calls to boundary conditions
@@ -84,7 +86,10 @@ public:
     //! \brief calls the device function of the pressure boundary condition (post-collision)
     void runPressureBCKernelPost(const int level) const;
 
-    //! \brief calls the device function of the outflow boundary condition (pre-collision)
+	//! \brief calls the device function of the precursor boundary condition
+	void runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager);
+
+    //! \brief calls the device function of the outflow boundary condition
     void runOutflowBCKernelPre(const int level) const;
 
     //! \brief calls the device function of the stress wall model (post-collision)
@@ -96,13 +101,16 @@ private:
     //! \param boundaryCondition: a kernel function for the boundary condition
     //! \param bcStruct: a struct containing the grid nodes which are part of the boundary condition
     //! \param bcName: the name of the checked boundary condition
-    template <typename bcFunction>
-    void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBoundaryConditions &bcStruct, const std::string &bcName)
+    template <typename bcFunction, typename QforBC>
+    void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBC &bcStruct, const std::string &bcName)
     {
         if (!boundaryCondition && bcStruct.numberOfBCnodes > 0)
             throw std::runtime_error("The boundary condition " + bcName + " was not set!");
     }
 
+    void runDistributionPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager);
+    void runVelocityPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager);
+
     SPtr<Parameter> para;
 
     boundaryCondition velocityBoundaryConditionPost = nullptr;
@@ -111,5 +119,6 @@ private:
     boundaryCondition pressureBoundaryConditionPre = nullptr;
     boundaryCondition geometryBoundaryConditionPost = nullptr;
     boundaryConditionWithParameter stressBoundaryConditionPost = nullptr;
+    precursorBoundaryCondition precursorBoundaryConditionPost = nullptr;
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp
index d55fa51bd8a225dd4e89e684bc81cd56f3f450c0..a0e02112e821eedcfeb013d3465529f668309529 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp
@@ -53,3 +53,9 @@ TEST_F(BCKernelManagerTest_BCsNotSpecified, stressBoundaryConditionPost_NotSpeci
     para->getParD(0)->stressBC.numberOfBCnodes = 1;
     EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
 }
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, precursorBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->precursorBC.numberOfBCnodes = 1;
+    EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
+}
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
index c3129e31a9c750a012a26d58961062eaf3f40add..2b6a266c0d4e5f523091fa4982eee5d83b2ec675 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
@@ -59,8 +59,9 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr
         VF_LOG_TRACE("Function for scalingCoarseToFine is nullptr");
 }
 
-void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, int streamIndex) const{
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const
+{
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
 
     this->scalingFineToCoarse(para->getParD(level).get(), para->getParD(level+1).get(), icellFC, offFC, stream);
 
@@ -327,9 +328,9 @@ void GridScalingKernelManager::runFineToCoarseKernelAD(const int level) const
     }
 }
 
-void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCellCF* icellCF, OffCF &offCF, int streamIndex) const
+void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
     this->scalingCoarseToFine(para->getParD(level).get(), para->getParD(level+1).get(), icellCF, offCF, stream);
 
     // ScaleCF_comp_D3Q27F3(
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
index 85cdd88ec2e3a6622108026ce8f53c5c770f8afe..3c78ee7f9db254556e8ec6dbbafaf51cd995f10b 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
@@ -44,6 +44,7 @@
 class Parameter;
 class CudaMemoryManager;
 class GridScalingFactory;
+enum class CudaStreamIndex;
 struct LBMSimulationParameter;
 struct CUstream_st;
 
@@ -62,14 +63,14 @@ public:
     //! \throws std::runtime_error when the user forgets to specify a scaling function
     GridScalingKernelManager(SPtr<Parameter> parameter, GridScalingFactory *gridScalingFactory);
 
-    //! \brief calls the device function of the fine to coarse grid interpolation kernel
-    void runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, int streamIndex) const;
+    //! \brief calls the device function of the fine to coarse grid interpolation kernelH
+    void runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const;
 
     //! \brief calls the device function of the fine to coarse grid interpolation kernel (advection diffusion)
     void runFineToCoarseKernelAD(const int level) const;
 
     //! \brief calls the device function of the coarse to fine grid interpolation kernel
-    void runCoarseToFineKernelLB(const int level, InterpolationCellCF *icellCF, OffCF &offCF, int streamIndex) const;
+    void runCoarseToFineKernelLB(const int level, InterpolationCellCF *icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const;
 
     //! \brief calls the device function of the coarse to fine grid interpolation kernel (advection diffusion)
     void runCoarseToFineKernelAD(const int level) const;
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h
new file mode 100644
index 0000000000000000000000000000000000000000..225f615ec3ad2d8ef11ec295f8d9e8a4166d99fe
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h
@@ -0,0 +1,108 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ChimeraTransformation.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters
+//=======================================================================================
+#ifndef CHIMERA_TRANSFORMATION_H
+#define CHIMERA_TRANSFORMATION_H
+
+#include "LBM/LB.h"
+
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
+namespace vf::gpu
+{
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardInverseChimeraWithK
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> Modified for lower round-off errors.
+__inline__ __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
+{
+    real m2 = mfa + mfc;
+    real m1 = mfc - mfa;
+    real m0 = m2 + mfb;
+    mfa = m0;
+    m0 *= Kinverse;
+    m0 += c1o1;
+    mfb = (m1 * Kinverse - m0 * vv) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardInverseChimeraWithK
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> Modified for lower round-off errors.
+__inline__ __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
+{
+    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
+    mfa = m0;
+    mfb = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardChimera
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
+//! errors.
+__inline__ __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    real m1 = (mfa + mfc) + mfb;
+    real m2 = mfc - mfa;
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
+    mfb = m2 - vv * m1;
+    mfa = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardChimera
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
+//! errors.
+__inline__ __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
+    mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
+    mfb = mb;
+    mfa = ma;
+}
+
+} // namespace vf::gpu
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
new file mode 100644
index 0000000000000000000000000000000000000000..37208ee59586533fa7f8ffbc269246826ed27fb8
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
@@ -0,0 +1,198 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file KernelUtilities.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters
+//=======================================================================================
+#ifndef KERNEL_UTILITIES_H
+#define KERNEL_UTILITIES_H
+
+#include "LBM/LB.h"
+#include "lbm/constants/D3Q27.h"
+#include "lbm/constants/NumericConstants.h"
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+
+namespace vf::gpu
+{
+
+__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep)
+{
+    if (isEvenTimestep)
+    {
+        dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
+        dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes];
+        dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes];
+        dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
+        dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
+        dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes];
+        dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes];
+        dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
+        dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
+        dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
+        dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
+        dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes];
+        dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes];
+        dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes];
+        dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes];
+        dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes];
+        dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes];
+        dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes];
+        dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes];
+        dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes];
+        dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes];
+        dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes];
+        dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes];
+        dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes];
+        dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes];
+        dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes];
+        dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes];
+    }
+    else
+    {
+         dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes];
+         dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes];
+         dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
+         dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
+         dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes];
+         dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes];
+         dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
+         dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
+         dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
+         dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
+         dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes];
+         dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes];
+         dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes];
+         dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes];
+         dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes];
+         dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes];
+         dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes];
+         dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes];
+         dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
+         dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes];
+         dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes];
+         dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes];
+         dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes];
+         dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes];
+         dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes];
+         dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes];
+         dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes];
+    }
+}
+
+__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices)
+{
+    subgridD.q[DIR_P00] = &subgridDistances[DIR_P00 * numberOfSubgridIndices];
+    subgridD.q[DIR_M00] = &subgridDistances[DIR_M00 * numberOfSubgridIndices];
+    subgridD.q[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfSubgridIndices];
+    subgridD.q[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfSubgridIndices];
+    subgridD.q[DIR_00P] = &subgridDistances[DIR_00P * numberOfSubgridIndices];
+    subgridD.q[DIR_00M] = &subgridDistances[DIR_00M * numberOfSubgridIndices];
+    subgridD.q[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfSubgridIndices];
+    subgridD.q[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfSubgridIndices];
+    subgridD.q[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfSubgridIndices];
+    subgridD.q[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfSubgridIndices];
+    subgridD.q[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfSubgridIndices];
+    subgridD.q[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfSubgridIndices];
+    subgridD.q[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfSubgridIndices];
+    subgridD.q[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfSubgridIndices];
+    subgridD.q[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfSubgridIndices];
+    subgridD.q[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfSubgridIndices];
+    subgridD.q[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfSubgridIndices];
+    subgridD.q[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfSubgridIndices];
+    subgridD.q[DIR_000] = &subgridDistances[DIR_000 * numberOfSubgridIndices];
+    subgridD.q[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfSubgridIndices];
+    subgridD.q[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfSubgridIndices];
+    subgridD.q[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfSubgridIndices];
+    subgridD.q[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfSubgridIndices];
+    subgridD.q[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfSubgridIndices];
+    subgridD.q[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfSubgridIndices];
+    subgridD.q[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfSubgridIndices];
+    subgridD.q[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfSubgridIndices];
+}
+
+__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight)
+{
+    return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq);
+}
+
+__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                const real& omega, const real& velocity, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
+}
+
+__inline__ __device__ real getBounceBackDistributionForVeloBC(  const real& f,
+                                                                const real& velocity, const real weight)
+{
+
+    return f - (c6o1 * weight * velocity);
+}
+
+__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                  const real& omega)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse)) / (c1o1 + q);
+}
+
+
+__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                            const real& omega, const real& drho, const real& velocity, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho;
+}
+
+__inline__ __device__ unsigned int getNodeIndex()
+{
+    const unsigned x = threadIdx.x;
+    const unsigned y = blockIdx.x;
+    const unsigned z = blockIdx.y;
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    return nx * (ny * z + y) + x;
+}
+
+__inline__ __device__ bool isValidFluidNode(uint nodeType)
+{
+    return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
+}
+
+
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h
new file mode 100644
index 0000000000000000000000000000000000000000..53990e452be06dc6840c801816e8231d26861e2e
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h
@@ -0,0 +1,136 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ScalingUtilities.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann
+//=======================================================================================
+#ifndef SCALING_HELPER_FUNCTIONS_H
+#define SCALING_HELPER_FUNCTIONS_H
+
+#include "LBM/LB.h" 
+#include "lbm/constants/D3Q27.h"
+#include "lbm/constants/NumericConstants.h"
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+
+namespace vf::gpu
+{
+
+__device__ __inline__ void calculateMomentsOnSourceNodes(Distributions27 &dist, real &omega, unsigned int &k_000,
+                                                         unsigned int &k_M00, unsigned int &k_0M0, unsigned int &k_00M,
+                                                         unsigned int &k_MM0, unsigned int &k_M0M, unsigned int &k_0MM,
+                                                         unsigned int &k_MMM, real &drho, real &velocityX,
+                                                         real &velocityY, real &velocityZ, real &kxyFromfcNEQ,
+                                                         real &kyzFromfcNEQ, real &kxzFromfcNEQ, real &kxxMyyFromfcNEQ,
+                                                         real &kxxMzzFromfcNEQ)
+{
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Set local distributions (f's) on source nodes:
+    //!
+    real f_000 = (dist.f[DIR_000])[k_000];
+    real f_P00 = (dist.f[DIR_P00])[k_000];
+    real f_M00 = (dist.f[DIR_M00])[k_M00];
+    real f_0P0 = (dist.f[DIR_0P0])[k_000];
+    real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+    real f_00P = (dist.f[DIR_00P])[k_000];
+    real f_00M = (dist.f[DIR_00M])[k_00M];
+    real f_PP0 = (dist.f[DIR_PP0])[k_000];
+    real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+    real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+    real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+    real f_P0P = (dist.f[DIR_P0P])[k_000];
+    real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+    real f_P0M = (dist.f[DIR_P0M])[k_00M];
+    real f_M0P = (dist.f[DIR_M0P])[k_M00];
+    real f_0PP = (dist.f[DIR_0PP])[k_000];
+    real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+    real f_0PM = (dist.f[DIR_0PM])[k_00M];
+    real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+    real f_PPP = (dist.f[DIR_PPP])[k_000];
+    real f_MPP = (dist.f[DIR_MPP])[k_M00];
+    real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+    real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+    real f_PPM = (dist.f[DIR_PPM])[k_00M];
+    real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+    real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+    real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
+    //!
+    drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+            (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+             ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+            ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+           f_000;
+
+    real oneOverRho = c1o1 / (c1o1 + drho);
+
+    velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                 (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                oneOverRho;
+    velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                 (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                oneOverRho;
+    velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                 (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                oneOverRho;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate second order moments for interpolation
+    //!
+    // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction
+    kxyFromfcNEQ = -c3o1 * omega *
+                   ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) /
+                    (c1o1 + drho) -
+                    ((velocityX * velocityY)));
+    kyzFromfcNEQ = -c3o1 * omega *
+                   ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) /
+                    (c1o1 + drho) -
+                    ((velocityY * velocityZ)));
+    kxzFromfcNEQ = -c3o1 * omega *
+                   ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) /
+                    (c1o1 + drho) -
+                    ((velocityX * velocityZ)));
+    kxxMyyFromfcNEQ = -c3o2 * omega *
+                      ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) /
+                       (c1o1 + drho) -
+                       ((velocityX * velocityX - velocityY * velocityY)));
+    kxxMzzFromfcNEQ = -c3o2 * omega *
+                      ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) /
+                       (c1o1 + drho) -
+                       ((velocityX * velocityX - velocityZ * velocityZ)));
+}
+
+} // namespace vf::gpu
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index eea4adfda3c1ef0862f39ef58fc6e065af7bab1b..cfdbbbae040a13f94e97d40d702b93d5a1e19c86 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -15,9 +15,9 @@
 
 //////////////////////////
 //porous media
-#define GEO_PM_0		 5
-#define GEO_PM_1		 6
-#define GEO_PM_2		 7
+#define GEO_PM_0         5
+#define GEO_PM_1         6
+#define GEO_PM_2         7
 //////////////////////////
 
 #define GEO_SOLID       15
@@ -53,17 +53,33 @@
 //! \brief An enumeration for selecting a turbulence model
 enum class TurbulenceModel {
    //! - Smagorinsky
-    Smagorinsky,
+   Smagorinsky,
     //! - AMD (Anisotropic Minimum Dissipation) model, see e.g. Rozema et al., Phys. Fluids 27, 085107 (2015), https://doi.org/10.1063/1.4928700
-    AMD,
+   AMD,
     //! - QR model by Verstappen 
-    QR,
+   QR,
     //! - TODO: move the WALE model here from the old kernels
     //WALE
     //! - No turbulence model
-    None
+   None
 };
 
+//! \brief An enumeration for selecting a template of the collision kernel (CumulantK17)
+enum class CollisionTemplate {
+   //! - Default: plain collision without additional read/write
+   Default,
+   //!  - WriteMacroVars: collision \w write out macroscopic variables
+   WriteMacroVars,
+   //! - ApplyBodyForce: collision \w read and apply body force in the collision kernel
+   ApplyBodyForce,
+   //! - AllFeatures: collision \w write out macroscopic variables AND read and apply body force
+   AllFeatures,
+   //! - Border: collision on border nodes
+   SubDomainBorder
+};
+constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate  = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::SubDomainBorder};
+constexpr std::initializer_list<CollisionTemplate> bulk_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures};
+
 struct InitCondition
 {
    real Re;
@@ -144,6 +160,7 @@ struct InitCondition
    bool hasWallModelMonitor {false};
    bool simulatePorousMedia {false};
    bool streetVelocityFile {false};
+   real outflowPressureCorrectionFactor {0.0};
 };
 
 //Interface Cells
@@ -174,7 +191,7 @@ typedef struct OffFC{
 
 // Distribution functions g 6
 typedef struct  Distri6 {
-	real* g[6];
+   real* g[6];
 } Distributions6;
 
 // Distribution functions f 7
@@ -214,6 +231,21 @@ typedef struct QforBC{
    real *normalX, *normalY, *normalZ;
 }QforBoundaryConditions;
 
+typedef struct QforPrecursorBC{
+   int* k;
+   int numberOfBCnodes=0;
+   int sizeQ;
+   int numberOfPrecursorNodes=0;
+   uint nPrecursorReads=0;
+   uint timeStepsBetweenReads;
+   size_t numberOfQuantities;
+   real* q27[27];
+   uint* planeNeighbor0PP, *planeNeighbor0PM, *planeNeighbor0MP, *planeNeighbor0MM;
+   real* weights0PP, *weights0PM, *weights0MP,  *weights0MM;
+   real* last, *current, *next;
+   real velocityX, velocityY, velocityZ;
+}QforPrecursorBoundaryConditions;
+
 //BCTemp
 typedef struct TempforBC{
    int* k;
@@ -249,57 +281,56 @@ typedef struct WMparas{
    real* Fz;
 }WallModelParameters;
 
+
 //measurePoints
 typedef struct MeasP{
-	std::string name;
-	uint k;
-	std::vector<real> Vx;
-	std::vector<real> Vy;
-	std::vector<real> Vz;
-	std::vector<real> Rho;
-	//real* Vx;
-	//real* Vy;
-	//real* Vz;
-	//real* Rho;
+   std::string name;
+   uint k;
+   std::vector<real> Vx;
+   std::vector<real> Vy;
+   std::vector<real> Vz;
+   std::vector<real> Rho;
+   //real* Vx;
+   //real* Vy;
+   //real* Vz;
+   //real* Rho;
 }MeasurePoints;
 
 //Process Neighbors
 typedef struct PN27{
-	real* f[27];
-	uint memsizeFs;
-	int* index;
-	uint memsizeIndex;
-	uint rankNeighbor;
-	int numberOfNodes;
-	int numberOfFs;
+   real* f[27];
+   uint memsizeFs;
+   int* index;
+   uint memsizeIndex;
+   uint rankNeighbor;
+   int numberOfNodes;
+   int numberOfFs;
 }ProcessNeighbor27;
 
 typedef struct PN_F3 {
-	real* g[6];
-	uint memsizeGs;
-	int* index;
-	uint memsizeIndex;
-	uint rankNeighbor;
-	int numberOfNodes;
-	int numberOfGs;
+   real* g[6];
+   uint memsizeGs;
+   int* index;
+   uint memsizeIndex;
+   uint rankNeighbor;
+   int numberOfNodes;
+   int numberOfGs;
 }ProcessNeighborF3;
 
 //path line particles
 typedef struct PLP{
-	bool *stuck, *hot;
-	real *coordXabsolut, *coordYabsolut, *coordZabsolut;
-	real *coordXlocal,   *coordYlocal,   *coordZlocal;
-	real *veloX,         *veloY,         *veloZ;
-	real *randomLocationInit;
-	uint *timestep;
-	uint *ID;
-	uint *cellBaseID;
-	uint numberOfParticles, numberOfTimestepsParticles;
-	uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC;
+   bool *stuck, *hot;
+   real *coordXabsolut, *coordYabsolut, *coordZabsolut;
+   real *coordXlocal,   *coordYlocal,   *coordZlocal;
+   real *veloX,         *veloY,         *veloZ;
+   real *randomLocationInit;
+   uint *timestep;
+   uint *ID;
+   uint *cellBaseID;
+   uint numberOfParticles, numberOfTimestepsParticles;
+   uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC;
 }PathLineParticles;
 
-
-
 //////////////////////////////////////////////////////////////////////////
 inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 {
@@ -308,7 +339,4 @@ inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 }
 //////////////////////////////////////////////////////////////////////////
 
-
 #endif
-
-
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 703e935e6edb5676c7d6e045a38e3ec20d7a4b41..84ab84ff93fa7706bcc27d7e61a18f580f3c8dbe 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -97,11 +97,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
 
     gridProvider.allocAndCopyForcing();
     gridProvider.allocAndCopyQuadricLimiters();
-    if (para->getKernelNeedsFluidNodeIndicesToRun()) {
-        gridProvider.allocArrays_fluidNodeIndices();
-        gridProvider.allocArrays_fluidNodeIndicesBorder();
-    }
-
+        
     gridProvider.setDimensions();
     gridProvider.setBoundingBox();
 
@@ -113,12 +109,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
         para->setStartTurn((unsigned int)0); // 100000
 
     restart_object = std::make_shared<ASCIIRestartObject>();
-    //////////////////////////////////////////////////////////////////////////
-    // CUDA streams
-    if (para->getUseStreams()) {
-        para->getStreamManager()->launchStreams(2u);
-        para->getStreamManager()->createCudaEvents();
-    }
+
     //////////////////////////////////////////////////////////////////////////
     VF_LOG_INFO("LB_Modell:       D3Q{}", para->getD3Qxx());
     VF_LOG_INFO("Re:              {}", para->getRe());
@@ -134,14 +125,32 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
     //////////////////////////////////////////////////////////////////////////
     allocNeighborsOffsetsScalesAndBoundaries(gridProvider);
 
+    //! Get tagged fluid nodes with corresponding value for CollisionTemplate from interactors
     for (SPtr<PreCollisionInteractor> actuator : para->getActuators()) {
         actuator->init(para.get(), &gridProvider, cudaMemoryManager.get());
+        actuator->getTaggedFluidNodes( para.get(), &gridProvider );
     }
 
     for (SPtr<PreCollisionInteractor> probe : para->getProbes()) {
         probe->init(para.get(), &gridProvider, cudaMemoryManager.get());
+        probe->getTaggedFluidNodes( para.get(), &gridProvider );
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    // CUDA streams
+    if (para->getUseStreams()) {
+        para->getStreamManager()->registerStream(CudaStreamIndex::SubDomainBorder);
+        para->getStreamManager()->registerStream(CudaStreamIndex::Bulk);
+        para->getStreamManager()->launchStreams();
+        para->getStreamManager()->createCudaEvents();
+    }
+    //////////////////////////////////////////////////////////////////////////
+    
+    if (para->getKernelNeedsFluidNodeIndicesToRun())
+    {
+        gridProvider.sortFluidNodeTags();
+        gridProvider.allocArrays_taggedFluidNodes();
+    }
     //////////////////////////////////////////////////////////////////////////
     // Kernel init
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f7bb2e680c0fb3ea597239ee0cbc1772f2efe81b
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
@@ -0,0 +1,179 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DistributionDebugInspector.cu
+//! \ingroup Output
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
+#include "DistributionDebugInspector.h"
+
+#include "Parameter/Parameter.h"
+#include "lbm/constants/D3Q27.h"
+#include "lbm/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+
+#include <cuda/CudaGrid.h>
+#include <cuda.h>
+
+#include <iostream>
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+__global__ void printFs(
+    real* distributions,
+    bool isEvenTimestep,
+    unsigned long long numberOfFluidNodes,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* typeOfGridNode,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real minX,
+    real maxX,
+    real minY,
+    real maxY,
+    real minZ,
+    real maxZ)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned k_000 = getNodeIndex();
+
+    if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) 
+        return;
+
+    real coordNodeX = coordX[k_000];
+    real coordNodeY = coordY[k_000];
+    real coordNodeZ = coordZ[k_000];
+
+    if( coordNodeX>=minX && coordNodeX<=maxX &&
+        coordNodeY>=minY && coordNodeY<=maxY &&
+        coordNodeZ>=minZ && coordNodeZ<=maxZ    )
+        {
+            Distributions27 dist;
+            getPointersToDistributions(dist, distributions, numberOfFluidNodes, isEvenTimestep);
+            ////////////////////////////////////////////////////////////////////////////////
+            //! - Set neighbor indices (necessary for indirect addressing)
+            uint k_M00 = neighborX[k_000];
+            uint k_0M0 = neighborY[k_000];
+            uint k_00M = neighborZ[k_000];
+            uint k_MM0 = neighborY[k_M00];
+            uint k_M0M = neighborZ[k_M00];
+            uint k_0MM = neighborZ[k_0M0];
+            uint k_MMM = neighborZ[k_MM0];
+            ////////////////////////////////////////////////////////////////////////////////////
+            //! - Set local distributions
+            //!
+            real f_000 = (dist.f[DIR_000])[k_000];
+            real f_P00 = (dist.f[DIR_P00])[k_000];
+            real f_M00 = (dist.f[DIR_M00])[k_M00];
+            real f_0P0 = (dist.f[DIR_0P0])[k_000];
+            real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+            real f_00P = (dist.f[DIR_00P])[k_000];
+            real f_00M = (dist.f[DIR_00M])[k_00M];
+            real f_PP0 = (dist.f[DIR_PP0])[k_000];
+            real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+            real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+            real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+            real f_P0P = (dist.f[DIR_P0P])[k_000];
+            real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+            real f_P0M = (dist.f[DIR_P0M])[k_00M];
+            real f_M0P = (dist.f[DIR_M0P])[k_M00];
+            real f_0PP = (dist.f[DIR_0PP])[k_000];
+            real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+            real f_0PM = (dist.f[DIR_0PM])[k_00M];
+            real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+            real f_PPP = (dist.f[DIR_PPP])[k_000];
+            real f_MPP = (dist.f[DIR_MPP])[k_M00];
+            real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+            real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+            real f_PPM = (dist.f[DIR_PPM])[k_00M];
+            real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+            real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+            real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+            real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+                        (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                        ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                        ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                            f_000;
+
+            real oneOverRho = c1o1 / (c1o1 + drho);
+
+            real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                        (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                    oneOverRho;
+            real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                        (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                    oneOverRho;
+            real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                        (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                    oneOverRho;
+
+            printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz);
+            printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM);
+
+        }
+
+}
+
+
+
+
+void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t)
+{
+    if(this->inspectionLevel!=level)
+        return;
+
+    std::cout << tag << ": distributions on level " << level << " at t " << t <<  std::endl;
+
+    vf::cuda::CudaGrid cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    printFs <<< cudaGrid.grid, cudaGrid.threads >>>(    para->getParD(level)->distributions.f[0],
+                                                        para->getParD(level)->isEvenTimestep,
+                                                        para->getParD(level)->numberOfNodes,
+                                                        para->getParD(level)->neighborX,
+                                                        para->getParD(level)->neighborY,
+                                                        para->getParD(level)->neighborZ,
+                                                        para->getParD(level)->typeOfGridNode,
+                                                        para->getParD(level)->coordinateX,
+                                                        para->getParD(level)->coordinateY,
+                                                        para->getParD(level)->coordinateZ,
+                                                        minX,
+                                                        maxX,
+                                                        minY,
+                                                        maxY,
+                                                        minZ,
+                                                        maxZ);
+
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
new file mode 100644
index 0000000000000000000000000000000000000000..95fea46d4eba0c2f2ff0846d22ee5da4f6c357ea
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
@@ -0,0 +1,76 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DistributionDebugInspector.h
+//! \author Henrik Asmuth
+//! \date 13/012/2022
+//! \brief Basic debugging class to print out f's in a certain area of the domain
+//!
+//! Basic debugging class. Needs to be directly added in UpdateGrid (could potentially also be added as a proper Probe in the future)
+//! How to use: Define a part of the domain via min/max x, y, and z. The DistributionDebugInspector will print out all f's in that area.
+//!
+//=======================================================================================
+
+#ifndef DISTRIBUTION_INSPECTOR_H
+#define DISTRIBUTION_INSPECTOR_H
+
+#include "Parameter/Parameter.h"
+
+
+class DistributionDebugInspector
+{
+public:
+    DistributionDebugInspector(uint _inspectionLevel, real _minX, real _maxX, real _minY, real _maxY, real _minZ, real _maxZ, std::string _tag):
+    inspectionLevel(_inspectionLevel),
+    minX(_minX),
+    maxX(_maxX),
+    minY(_minY),
+    maxY(_maxY),
+    minZ(_minZ),
+    maxZ(_maxZ),
+    tag(_tag)
+    {};
+
+    ~DistributionDebugInspector() = default;
+
+    void inspect(std::shared_ptr<Parameter> para, uint level, uint t);
+
+
+private:
+uint inspectionLevel;
+real minX;
+real maxX;
+real minY;
+real maxY;
+real minZ;
+real maxZ;
+std::string tag;
+
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
index c6e53ee3cbfb98f11e373ca014c7faf4e70a86f0..edf705421530bdbc9c2c9fd8c44eca6d3c5ab923 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
@@ -50,7 +50,7 @@ void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int tim
 
 void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level)
 {
-    const unsigned int numberOfParts = para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
+    const unsigned int numberOfParts = (uint)para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
     std::vector<std::string> fname;
     std::vector<std::string> fnameMed;
 
@@ -217,8 +217,8 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
 
@@ -340,8 +340,8 @@ void FileWriter::writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
 
@@ -449,9 +449,9 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i
     {
         //printf("\n test in if I... \n");
         //////////////////////////////////////////////////////////////////////////
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
         {
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         }
         else
         {
@@ -558,8 +558,8 @@ void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> par
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
         //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
index 0b1e9dc1c25457457eabe3013a288c4c93577dc3..4d5895b323efa1b94a5780a59c882fd5ce1be7eb 100644
--- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
@@ -290,10 +290,10 @@ void writeNeighborXPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -317,18 +317,18 @@ void writeNeighborXLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
@@ -350,10 +350,10 @@ void writeNeighborYPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -377,18 +377,18 @@ void writeNeighborYLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
@@ -410,10 +410,10 @@ void writeNeighborZPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -437,18 +437,18 @@ void writeNeighborZLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
index 83f0a677b0012153cf079b466a333acc58bda6be..57139d25ae4d046e1dd1be1f3ef5e179daf0872e 100644
--- a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
@@ -5,53 +5,57 @@
 #include "Logger.h"
 #include "Parameter/Parameter.h"
 #include "basics/utilities/UbSystem.h"
-#include "grid/NodeValues.h"
+#include "gpu/GridGenerator/grid/NodeValues.h"
 #include "lbm/constants/D3Q27.h"
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 
-#include "Utilities/FindNeighbors.h"
-#include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "Core/StringUtilities/StringUtil.h"
+#include "Utilities/FindNeighbors.h"
+#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
 
 namespace NeighborDebugWriter
 {
 
-inline void writeNeighborLinkLines(Parameter *para, const int level, const uint numberOfNodes, const int direction,
-                                   const std::string &name)
+inline void writeNeighborLinkLines(LBMSimulationParameter *parH, int direction, const std::string &name,
+                                   WbWriter *writer)
 {
     VF_LOG_INFO("Write node links in direction {}.", direction);
-    std::vector<UbTupleFloat3> nodes(numberOfNodes * 2);
-    std::vector<UbTupleInt2> cells(numberOfNodes);
 
-    for (uint position = 0; position < numberOfNodes; position++) {
-        if (para->getParH(level)->typeOfGridNode[position] != GEO_FLUID)
+    const unsigned long long numberOfNodes = parH->numberOfNodes;
+    std::vector<UbTupleFloat3> nodes;
+    nodes.reserve(numberOfNodes);
+    std::vector<UbTupleInt2> cells;
+    cells.reserve(numberOfNodes/2);
+
+    for (size_t position = 0; position < numberOfNodes; position++) {
+        if (parH->typeOfGridNode[position] != GEO_FLUID)
             continue;
 
-        const double x1 = para->getParH(level)->coordinateX[position];
-        const double x2 = para->getParH(level)->coordinateY[position];
-        const double x3 = para->getParH(level)->coordinateZ[position];
+        const double x1 = parH->coordinateX[position];
+        const double x2 = parH->coordinateY[position];
+        const double x3 = parH->coordinateZ[position];
 
-        const uint positionNeighbor = getNeighborIndex(para->getParH(level).get(), position, direction);
+        const uint positionNeighbor = getNeighborIndex(parH, (uint)position, direction);
 
-        const double x1Neighbor = para->getParH(level)->coordinateX[positionNeighbor];
-        const double x2Neighbor = para->getParH(level)->coordinateY[positionNeighbor];
-        const double x3Neighbor = para->getParH(level)->coordinateZ[positionNeighbor];
+        const double x1Neighbor = parH->coordinateX[positionNeighbor];
+        const double x2Neighbor = parH->coordinateY[positionNeighbor];
+        const double x3Neighbor = parH->coordinateZ[positionNeighbor];
 
         nodes.emplace_back(float(x1), float(x2), float(x3));
         nodes.emplace_back(float(x1Neighbor), float(x2Neighbor), float(x3Neighbor));
 
         cells.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1);
     }
-    WbWriterVtkXmlBinary::getInstance()->writeLines(name, nodes, cells);
+    writer->writeLines(name, nodes, cells);
 }
 
 inline void writeNeighborLinkLinesDebug(Parameter *para)
 {
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (int direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) {
+        for (size_t direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) {
             const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_Link_" +
                                          std::to_string(direction) + "_Debug.vtk";
-            writeNeighborLinkLines(para, level, para->getParH(level)->numberOfNodes, direction, fileName);
+            writeNeighborLinkLines(para->getParH(level).get(), (int)direction, fileName, WbWriterVtkXmlBinary::getInstance());
         }
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a19ed3d723f28998f5d27cd15ebf4bab8ba061c4
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp
@@ -0,0 +1,79 @@
+#include <gmock/gmock.h>
+#include "NeighborDebugWriter.hpp"
+#include "gpu/VirtualFluids_GPU/Utilities/testUtilitiesGPU.h"
+
+class WbWriterSpy : public WbWriter
+{
+public:
+    std::string writeLines(const std::string & /*filename*/, std::vector<UbTupleFloat3> &nodes,
+                           std::vector<UbTupleInt2> &lines) override
+    {
+        this->nodes = nodes;
+        this->lines = lines;
+        return "";
+    }
+    std::vector<UbTupleFloat3> nodes;
+    std::vector<UbTupleInt2> lines;
+
+    std::string getFileExtension() override { return ""; }
+};
+
+class NeighborDebugWriterTest : public testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        typeOfGridNode = std::vector<uint>(numberOfNodes, GEO_FLUID);
+        neighbors = std::vector<uint>(numberOfNodes, 2);
+        coordinates = std::vector<real>(numberOfNodes, 1.0);
+        coordinates[2] = 3.0;
+
+        parH->numberOfNodes = numberOfNodes;
+        parH->coordinateX = coordinates.data();
+        parH->coordinateY = coordinates.data();
+        parH->coordinateZ = coordinates.data();
+        parH->neighborX = neighbors.data();
+        parH->typeOfGridNode = typeOfGridNode.data();
+    }
+
+    const int level = 0;
+    const unsigned long long numberOfNodes = 3;
+    const uint direction = vf::lbm::dir::DIR_P00; // x
+    std::unique_ptr<LBMSimulationParameter> parH = std::make_unique<LBMSimulationParameter>();
+    WbWriterSpy writerSpy;
+    std::vector<uint> typeOfGridNode;
+    std::vector<uint> neighbors;
+    std::vector<real> coordinates;
+};
+
+TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_onlyFLuidNodes_writesAllNodes)
+{
+    UbTupleFloat3 oneCoord(1.0, 1.0, 1.0);
+    UbTupleFloat3 threeCoord(3.0, 3.0, 3.0);
+    std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord, threeCoord, threeCoord };
+    std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3), UbTupleInt2(4, 5) };
+
+    NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy);
+
+    EXPECT_THAT(writerSpy.nodes.size(), testing::Eq(numberOfNodes * 2));
+    EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes));
+    EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes));
+    EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines));
+}
+
+TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_fluidAndSolidNodes_writesOnlyFluidNodes)
+{
+    typeOfGridNode[2] = GEO_SOLID;
+    
+    UbTupleFloat3 oneCoord(1.0, 1.0, 1.0);
+    UbTupleFloat3 threeCoord(3.0, 3.0, 3.0);
+    std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord};
+    std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3)};
+
+    NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy);
+
+    EXPECT_THAT(writerSpy.nodes.size(), testing::Eq((numberOfNodes-1) * 2));
+    EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes-1));
+    EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes));
+    EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines));
+}
diff --git a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
index 456f9c148c75c27fb899f976ba4f99b109fc3d4b..ce611d25d1aa3f9e98840a0f04d9b2045d0a224f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
@@ -33,9 +33,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+				for(size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->typeOfGridNode[u]);
+					out.writeInteger(para->getParH(level)->typeOfGridNode[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -46,9 +46,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborX[u]);
+					out.writeInteger(para->getParH(level)->neighborX[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -59,9 +59,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborY[u]);
+					out.writeInteger(para->getParH(level)->neighborY[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -72,9 +72,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborZ[u]);
+					out.writeInteger(para->getParH(level)->neighborZ[index]);
 				}
 				out.writeLine();
 			} //end levelloop
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d567c695a0e33b7a88c2c8cf3bcb88093ce5b802
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
@@ -0,0 +1,96 @@
+#ifndef QVTKWRITER_HPP
+#define QVTKWRITER_HPP
+
+#include <array>
+#include <vector>
+
+#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/utilities/UbSystem.h"
+#include "basics/writer/WbWriterVtkXmlBinary.h"
+#include "lbm/constants/D3Q27.h"
+#include "logger/Logger.h"
+
+#include "gpu/GridGenerator/grid/NodeValues.h"
+#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
+#include "gpu/VirtualFluids_GPU/LBM/LB.h"
+#include "gpu/VirtualFluids_GPU/Parameter/Parameter.h"
+#include "gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h"
+
+namespace QDebugVtkWriter
+{
+
+using namespace vf::lbm::dir;
+
+namespace
+{
+inline void modifyLineLengthsForQs(const std::array<double, 3> &coords, std::array<double, 3> &neighborCoords, real q)
+{
+    if (q == 1.0 || q <= 0.0)
+        return;
+
+    const auto dx = neighborCoords[0] - coords[0];
+    const auto dy = neighborCoords[1] - coords[1];
+    const auto dz = neighborCoords[2] - coords[2];
+
+    neighborCoords[0] = coords[0] + q * dx;
+    neighborCoords[1] = coords[1] + q * dy;
+    neighborCoords[2] = coords[2] + q * dz;
+}
+
+inline void writeQLines(LBMSimulationParameter *parH, QforBoundaryConditions &boundaryQ, const std::string &filepath,
+                        WbWriter *writer)
+{
+    VF_LOG_INFO("Write qs in for boundary condition to {}.", filepath);
+
+    const auto numberOfNodes = boundaryQ.numberOfBCnodes;
+    std::vector<UbTupleFloat3> nodes;
+    nodes.reserve(numberOfNodes * 8 * 2);
+    std::vector<UbTupleInt2> lines;
+    lines.reserve(numberOfNodes * 8);
+
+    std::vector<std::string> dataNames = { "nodeIndex", "q" };
+    std::vector<std::vector<float>> lineData(2);
+
+    for (size_t i = 0; i < numberOfNodes; i++) {
+        const auto nodeIndex = boundaryQ.k[i];
+        const std::array<double, 3> coords = { parH->coordinateX[nodeIndex], parH->coordinateY[nodeIndex],
+                                               parH->coordinateZ[nodeIndex] };
+
+        for (size_t direction = 1; direction < ENDDIR; direction++) {
+
+            const auto q = boundaryQ.q27[direction][i];
+            if (q <= (real)0.0) {
+                continue;
+            }
+
+            const auto positionNeighbor = getNeighborIndex(parH, (uint)nodeIndex, (int)direction);
+
+            std::array<double, 3> neighborCoords = { parH->coordinateX[positionNeighbor],
+                                                     parH->coordinateY[positionNeighbor],
+                                                     parH->coordinateZ[positionNeighbor] };
+
+            modifyLineLengthsForQs(coords, neighborCoords, q);
+
+            nodes.emplace_back(float(coords[0]), float(coords[1]), coords[2]);
+            nodes.emplace_back(float(neighborCoords[0]), float(neighborCoords[1]), float(neighborCoords[2]));
+
+            lines.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1);
+            lineData[0].push_back(nodeIndex);
+            lineData[1].push_back(q);
+        }
+    }
+
+    writer->writeLinesWithLineData(filepath, nodes, lines, dataNames, lineData);
+}
+} // namespace
+
+inline void writeQLinesDebug(Parameter *para, QforBoundaryConditions &boundaryQ, uint level, const std::string& fileName)
+{
+    const auto filePath = para->getFName() + "_" + fileName + ".vtk";
+    auto writer = WbWriterVtkXmlBinary::getInstance();
+    writeQLines(para->getParH(level).get(), boundaryQ, filePath, writer);
+}
+
+} // namespace QDebugVtkWriter
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9eecb25c663fcfc8fde353b76ccf20cbcb9cf272
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp
@@ -0,0 +1,60 @@
+#include "gmock/gmock.h"
+#include <cmath>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "QDebugVtkWriter.hpp"
+#include <tuple>
+
+MATCHER(DoubleNear5, "") {
+    return abs(std::get<0>(arg) - std::get<1>(arg)) < 0.00001;
+}
+
+using namespace QDebugVtkWriter;
+
+double calcVectorLength(const std::array<double, 3> coords, const std::array<double, 3> neighborCoords)
+{
+    return std::sqrt(std::pow((neighborCoords[0] - coords[0]), 2) + std::pow((neighborCoords[1] - coords[1]), 2) +
+                     std::pow((neighborCoords[2] - coords[2]), 2));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthsForQsSameCoords3)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 1, 1 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.3, 0.3 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthDifferentCoords)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 2, 3 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, 0.9 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthNegativeCoord)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 2, -3 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, -0.9 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
index d006636572377477aeb3599a8ae843ea2b1e31ff..c1a3658d318eb47e84530bf437afa0bb6ba91743 100644
--- a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
@@ -13,8 +13,6 @@
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 #include "Core/StringUtilities/StringUtil.h"
 
-//using namespace std;
-
 namespace QDebugWriter
 {
     void writeQValues(QforBoundaryConditions &Q, int* k, int kq, const std::string &name)
diff --git a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
index 81f2c028a6bbc7cd9c077571349f4f0465a08a05..f26b4e5795466a72aa1894de37bdb066b9ab9d04 100644
--- a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
@@ -33,7 +33,7 @@ namespace UnstructuredGridWriter
 
 		bool neighborsFluid;
 
-		unsigned int allnodes = para->getParH(level)->numberOfNodes * 8;
+		unsigned long long allnodes = para->getParH(level)->numberOfNodes * 8;
 
 		nodes.resize(allnodes);
 		nodedata[0].resize(allnodes);
@@ -45,7 +45,7 @@ namespace UnstructuredGridWriter
 		unsigned int nodeCount = 0;
 		double nodeDeltaLevel = para->getParH(level)->dx;
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID /*!= GEO_VOID*/)
 			{
@@ -197,9 +197,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -340,9 +340,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -479,9 +479,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -628,9 +628,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -771,9 +771,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -896,10 +896,10 @@ namespace UnstructuredGridWriter
 		vector< vector< double > > nodedata(nodedatanames.size());
 
 		//printf("\n test for if... \n");
-		if (para->getParH(level)->numberOfNodes > limitOfNodes)
+        if ((uint)para->getParH(level)->numberOfNodes > limitOfNodes)
 		{
 			//printf("\n test in if I... \n");
-			unsigned int restOfNodes = para->getParH(level)->numberOfNodes - limitOfNodes;
+            unsigned int restOfNodes = (uint)para->getParH(level)->numberOfNodes - limitOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			//PART I
 			nodes.resize(limitOfNodes);
@@ -984,7 +984,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(restOfNodes);
 			//printf("\n test in if IV... \n");
 
-			for (unsigned int pos=limitOfNodes;pos<para->getParH(level)->numberOfNodes;pos++)
+			for (size_t pos = limitOfNodes; pos < para->getParH(level)->numberOfNodes; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1055,7 +1055,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
 			//printf("\n test in else II... \n");
-			for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+			for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1148,7 +1148,7 @@ namespace UnstructuredGridWriter
 		unsigned int number1,number2,number3,number4,number5,number6,number7,number8;
 		bool neighborsFluid;
 		double vxmax = 0;
-		vector< vector< double > > nodedata(nodedatanames.size());
+		vector<vector<double>> nodedata(nodedatanames.size());
 
 		nodes.resize(para->getParH(level)->numberOfNodes);
 		nodedata[0].resize(para->getParH(level)->numberOfNodes);
@@ -1158,7 +1158,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1244,7 +1244,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1342,9 +1342,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1364,7 +1364,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(sizeOfNodes);
 			//////////////////////////////////////////////////////////////////////////
 			//printf("\n test in if II... \n");
-			for (unsigned int pos=startpos;pos<endpos;pos++)
+			for (size_t pos = startpos; pos < endpos; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1465,9 +1465,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1595,9 +1595,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1728,7 +1728,7 @@ namespace UnstructuredGridWriter
 		nodedatanames.push_back("geo");
 		unsigned int number1,number2,number3,number4,number5,number6,number7,number8;
 		bool neighborsFluid;
-		vector< vector< double > > nodedata(nodedatanames.size());
+		vector< vector<double>> nodedata(nodedatanames.size());
 
 		nodes.resize(para->getParH(level)->numberOfNodes);
 		nodedata[0].resize(para->getParH(level)->numberOfNodes);
@@ -1738,7 +1738,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1825,7 +1825,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1975,9 +1975,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2080,9 +2080,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2192,9 +2192,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2319,7 +2319,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->geometryBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
@@ -2423,7 +2423,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->velocityBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
@@ -2528,7 +2528,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->pressureBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
index 3731836f336d91c1bc4cc5f1a8f5ea0a10bee0a6..3cc771e413134e90b0d09d8eeb6dfee791f8a1e2 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
@@ -31,25 +31,39 @@
 #include <helper_cuda.h>
 #include <iostream>
 
-void CudaStreamManager::launchStreams(uint numberOfStreams)
+void CudaStreamManager::registerStream(CudaStreamIndex streamIndex)
+{   
+    if(streamIndex != CudaStreamIndex::Legacy)
+        cudaStreams.emplace(streamIndex, nullptr);
+}
+void CudaStreamManager::launchStreams()
 {
-    cudaStreams.resize(numberOfStreams);
-    for (cudaStream_t &stream : cudaStreams)
-        cudaStreamCreate(&stream);
+    for (auto &stream : cudaStreams)
+        cudaStreamCreate(&stream.second);
 }
 
 void CudaStreamManager::terminateStreams()
 {
-    for (cudaStream_t &stream : cudaStreams)
-        cudaStreamDestroy(stream);
+    for (auto &stream : cudaStreams)
+        cudaStreamDestroy(stream.second);
 }
 
-cudaStream_t &CudaStreamManager::getStream(uint streamIndex)
-{ return cudaStreams[streamIndex]; }
-
-int CudaStreamManager::getBorderStreamIndex() { return borderStreamIndex; }
+cudaStream_t &CudaStreamManager::getStream(CudaStreamIndex streamIndex, uint multiStreamIndex)
+{
+    if(streamIndex == CudaStreamIndex::Legacy)  return legacyStream;
+    if(streamIsRegistered(streamIndex))
+    {
+        auto it = cudaStreams.find(streamIndex);
+        for(uint idx=0; idx<multiStreamIndex; idx++) it++;
+        return it->second;
+    }
+    return legacyStream;
+}
 
-int CudaStreamManager::getBulkStreamIndex() { return bulkStreamIndex; }
+bool CudaStreamManager::streamIsRegistered(CudaStreamIndex streamIndex)
+{
+    return cudaStreams.count(streamIndex) > 0;
+}
 
 void CudaStreamManager::createCudaEvents()
 {
@@ -61,12 +75,12 @@ void CudaStreamManager::destroyCudaEvents()
     checkCudaErrors(cudaEventDestroy(startBulkKernel)); 
 }
 
-void CudaStreamManager::triggerStartBulkKernel(int streamIndex)
+void CudaStreamManager::triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex)
 {
-    checkCudaErrors(cudaEventRecord(startBulkKernel, cudaStreams[streamIndex]));
+    checkCudaErrors(cudaEventRecord(startBulkKernel, getStream(streamIndex, multiStreamIndex)));
 }
 
-void CudaStreamManager::waitOnStartBulkKernelEvent(int streamIndex)
+void CudaStreamManager::waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex)
 {
-    checkCudaErrors(cudaStreamWaitEvent(cudaStreams[streamIndex], startBulkKernel));
+    checkCudaErrors(cudaStreamWaitEvent(getStream(streamIndex, multiStreamIndex), startBulkKernel));
 }
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index c2d515ab5fe9c24388632a7ca9e1e4c78b7f1467..5c59bcd3a5e6178d6e70a63f803caf8e29f32604 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -30,32 +30,40 @@
 #ifndef STREAM_MANAGER_H
 #define STREAM_MANAGER_H
 
-#include <vector>
-#include "Core/DataTypes.h"
-
+#include <map>
+#include <cuda.h>
 #include <cuda_runtime.h>
+#include "Core/DataTypes.h"
 
+enum class CudaStreamIndex
+    {
+        Legacy,
+        Bulk,
+        SubDomainBorder,
+        Precursor,
+        ActuatorFarm
+    };
 class CudaStreamManager
-{
+{   
 private:
-    std::vector<cudaStream_t> cudaStreams;
+    std::multimap<CudaStreamIndex, cudaStream_t> cudaStreams;
     cudaEvent_t startBulkKernel = NULL;
-    const int borderStreamIndex       = 1;
-    const int bulkStreamIndex         = 0;
+    cudaStream_t legacyStream = CU_STREAM_LEGACY;
+
 
 public:
-    void launchStreams(uint numberOfStreams);
+    void registerStream(CudaStreamIndex streamIndex);
+    void launchStreams();
     void terminateStreams();
-    cudaStream_t &getStream(uint streamIndex);
-
-    int getBorderStreamIndex();
-    int getBulkStreamIndex();
+    cudaStream_t &getStream(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
 
+    bool streamIsRegistered(CudaStreamIndex streamIndex);
     // Events
     void createCudaEvents();
     void destroyCudaEvents();
-    void triggerStartBulkKernel(int streamIndex);
-    void waitOnStartBulkKernelEvent(int strteamIndex);
+
+    void triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
+    void waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex=0);
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 7687ec926270f23e57608ca5f3084bd26d4de20e..e593d16d6ed1f69ca65a22606a157e7ea9e6b111 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -43,6 +43,7 @@
 
 #include <basics/config/ConfigurationFile.h>
 
+#include "Logger.h"
 #include "Parameter/CudaStreamManager.h"
 
 Parameter::Parameter() : Parameter(1, 0, {}) {}
@@ -65,6 +66,8 @@ Parameter::Parameter(int numberOfProcesses, int myId, std::optional<const vf::ba
     initGridPaths();
     initGridBasePoints();
     initDefaultLBMkernelAllLevels();
+
+    this->cudaStreamManager = std::make_unique<CudaStreamManager>();
 }
 
 Parameter::~Parameter() = default;
@@ -500,10 +503,10 @@ void Parameter::initLBMSimulationParameter()
         parH[i]->sizePlaneXY      = parH[i]->nx * parH[i]->ny;
         parH[i]->sizePlaneYZ      = parH[i]->ny * parH[i]->nz;
         parH[i]->sizePlaneXZ      = parH[i]->nx * parH[i]->nz;
-        parH[i]->mem_size_real    = sizeof(real) * parH[i]->size_Mat;
-        parH[i]->mem_size_int     = sizeof(unsigned int) * parH[i]->size_Mat;
-        parH[i]->mem_size_bool    = sizeof(bool) * parH[i]->size_Mat;
-        parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz;
+//        parH[i]->mem_size_real    = sizeof(real) * parH[i]->size_Mat;         //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_int     = sizeof(unsigned int) * parH[i]->size_Mat; //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_bool    = sizeof(bool) * parH[i]->size_Mat;         //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz; //DEPRECATED: related to full matrix
         parH[i]->isEvenTimestep        = true;
         parH[i]->startz           = parH[i]->gridNZ * ic.myProcessId;
         parH[i]->endz             = parH[i]->gridNZ * ic.myProcessId + parH[i]->gridNZ;
@@ -568,10 +571,10 @@ void Parameter::initLBMSimulationParameter()
         parD[i]->sizePlaneXY      = parH[i]->sizePlaneXY;
         parD[i]->sizePlaneYZ      = parH[i]->sizePlaneYZ;
         parD[i]->sizePlaneXZ      = parH[i]->sizePlaneXZ;
-        parD[i]->mem_size_real    = sizeof(real) * parD[i]->size_Mat;
-        parD[i]->mem_size_int     = sizeof(unsigned int) * parD[i]->size_Mat;
-        parD[i]->mem_size_bool    = sizeof(bool) * parD[i]->size_Mat;
-        parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz;
+        //parD[i]->mem_size_real    = sizeof(real) * parD[i]->size_Mat;          //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_int     = sizeof(unsigned int) * parD[i]->size_Mat;  //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_bool    = sizeof(bool) * parD[i]->size_Mat;          //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz;  //DEPRECATED: related to full matrix
         parD[i]->isEvenTimestep        = parH[i]->isEvenTimestep;
         parD[i]->startz           = parH[i]->startz;
         parD[i]->endz             = parH[i]->endz;
@@ -586,6 +589,30 @@ void Parameter::initLBMSimulationParameter()
         parD[i]->distY            = parH[i]->distY;
         parD[i]->distZ            = parH[i]->distZ;
     }
+
+    checkParameterValidityCumulantK17();
+}
+
+void Parameter::checkParameterValidityCumulantK17() const
+{
+    if (this->mainKernel != "CumulantK17")
+        return;
+
+    const real viscosity = this->parH[maxlevel]->vis;
+    const real viscosityLimit = 1.0 / 42.0;
+    if (viscosity > viscosityLimit) {
+        VF_LOG_WARNING("The viscosity (in LB units) at level {} is {:1.3g}. It is recommended to keep it smaller than {:1.3g} "
+                       "for the CumulantK17 collision kernel.",
+                       maxlevel, viscosity, viscosityLimit);
+    }
+
+    const real velocity = this->ic.u0;
+    const real velocityLimit = 0.1;
+    if (velocity > velocityLimit) {
+        VF_LOG_WARNING("The velocity (in LB units) is {:1.4g}. It is recommended to keep it smaller than {:1.4g} for the "
+                       "CumulantK17 collision kernel.",
+                       velocity, velocityLimit);
+    }
 }
 
 void Parameter::copyMeasurePointsArrayToVector(int lev)
@@ -829,7 +856,7 @@ real Parameter::getLengthRatio()
 }
 real Parameter::getForceRatio()
 {
-    return this->getDensityRatio() * this->getVelocityRatio()/this->getTimeRatio();
+    return (this->getDensityRatio()+1.0) * this->getVelocityRatio()/this->getTimeRatio();
 }
 real Parameter::getScaledViscosityRatio(int level)
 {
@@ -859,6 +886,10 @@ real Parameter::getScaledForceRatio(int level)
 {
     return this->getForceRatio()*(level+1);
 }
+real Parameter::getScaledStressRatio(int level)
+{
+    return this->getVelocityRatio()*this->getVelocityRatio();
+}
 void Parameter::setRealX(real RealX)
 {
     ic.RealX = RealX;
@@ -883,6 +914,10 @@ void Parameter::setPressOutZ(unsigned int PressOutZ)
 {
     ic.PressOutZ = PressOutZ;
 }
+void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor)
+{
+    ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor;
+}
 void Parameter::setMaxDev(int maxdev)
 {
     ic.maxdev = maxdev;
@@ -1607,7 +1642,7 @@ void Parameter::setOutflowBoundaryNormalZ(std::string outflowNormalZ)
 void Parameter::setMainKernel(std::string kernel)
 {
     this->mainKernel = kernel;
-    if (kernel.find("Stream") != std::string::npos || kernel.find("Redesigned") != std::string::npos)
+    if ( kernel.find("CumulantK17") != std::string::npos )
         this->kernelNeedsFluidNodeIndicesToRun = true;
 }
 void Parameter::setMultiKernelOn(bool isOn)
@@ -1720,22 +1755,22 @@ unsigned int Parameter::getSizeMat(int level)
 {
     return parH[level]->size_Mat;
 }
-unsigned int Parameter::getMemSizereal(int level)
-{
-    return parH[level]->mem_size_real;
-}
-unsigned int Parameter::getMemSizeInt(int level)
-{
-    return parH[level]->mem_size_int;
-}
-unsigned int Parameter::getMemSizeBool(int level)
-{
-    return parH[level]->mem_size_bool;
-}
-unsigned int Parameter::getMemSizerealYZ(int level)
-{
-    return parH[level]->mem_size_real_yz;
-}
+//unsigned int Parameter::getMemSizereal(int level)      //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_real;
+//}
+//unsigned int Parameter::getMemSizeInt(int level)     //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_int;
+//}
+//unsigned int Parameter::getMemSizeBool(int level)    //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_bool;
+//}
+//unsigned int Parameter::getMemSizerealYZ(int level)  //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_real_yz;
+//}
 int Parameter::getFine()
 {
     return fine;
@@ -1916,6 +1951,10 @@ unsigned int Parameter::getPressOutZ()
 {
     return ic.PressOutZ;
 }
+real Parameter::getOutflowPressureCorrectionFactor()
+{
+    return ic.outflowPressureCorrectionFactor;
+}
 int Parameter::getMaxDev()
 {
     return ic.maxdev;
@@ -2657,8 +2696,7 @@ void Parameter::setUseStreams(bool useStreams)
     if (useStreams) {
         if (this->getNumprocs() != 1) {
             this->useStreams = useStreams;
-            this->cudaStreamManager = std::make_unique<CudaStreamManager>();
-            return;
+            return; 
         } else {
             std::cout << "Can't use streams with only one process!" << std::endl;
         }
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index cbb8bfd68702bc2285947eb76e6d0adc54a5b6c1..fa45b1742f20e32258195c78b630ce95175af938 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -54,6 +54,8 @@ class ConfigurationFile;
 }
 class CudaStreamManager;
 
+class TransientBCInputFileReader;
+
 //! \struct LBMSimulationParameter
 //! \brief struct holds and manages the LB-parameter of the simulation
 //! \brief For this purpose it holds structures and pointer for host and device data, respectively.
@@ -65,16 +67,78 @@ struct LBMSimulationParameter {
     //////////////////////////////////////////////////////////////////////////
     //! \brief stores the number of threads per GPU block
     uint numberofthreads;
+    //! \brief store all distribution functions for the D3Q27
+    Distributions27 distributions;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the type for every lattice node (f.e. fluid node)
+    uint *typeOfGridNode;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief store the neighbors in +X, +Y, +Z, and in diagonal negative direction
+    //! \brief this information is important because we use an indirect addressing scheme
+    uint *neighborX, *neighborY, *neighborZ, *neighborInverse;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief store the coordinates for every lattice node
+    real *coordinateX, *coordinateY, *coordinateZ;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief store the macroscopic values (velocity, density, pressure)
+    //! \brief for every lattice node
+    real *velocityX, *velocityY, *velocityZ, *rho, *pressure;
+    //! \brief stores the value for omega
+    real omega;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the number of nodes (based on indirect addressing scheme)
+    unsigned long long numberOfNodes;
+    //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity)
+    unsigned long long memSizeRealLBnodes, memSizeLonglongLBnodes;
+
+
+
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // DEPRECATED
+    //////////////////////////////////////////////////////////////////////////
 
     // distributions///////////
     // Distributions19 d0;
     Distributions27 d0;  // DEPRECATED: distribution functions for full matrix (not sparse)
-    //! \brief store all distribution functions for the D3Q27
-    Distributions27 distributions;
+
+    // typeOfGridNode (formerly known as "geo") /////////////////////
+    int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse)
+
+    // k///////////////////////
+    unsigned int *k; // DEPRECATED: index for full matrix
+
+    // memsize/////////////////
+    //unsigned int mem_size_real_yz;
+    //unsigned int mem_size_bool;
+    //unsigned int mem_size_int;
+    //unsigned int mem_size_real;
+
+    //////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // additional logic 
+    //////////////////////////////////////////////////////////////////////////
 
     // distributions F3////////
     Distributions6 g6;
 
+    unsigned int size_Array_SP;
+
+
+    // memsizeSP/////////////////
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+
+
     // advection diffusion //////////////////
     //! \brief store all distribution functions for the D3Q7 advection diffusion field
     Distributions7 distributionsAD7;
@@ -104,22 +168,6 @@ struct LBMSimulationParameter {
     real cStartx, cStarty, cStartz;
     real cFx, cFy, cFz;
 
-    // typeOfGridNode (formerly known as "geo") /////////////////////
-    int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse)
-    //! \brief stores the type for every lattice node (f.e. fluid node)
-    unsigned int *typeOfGridNode;
-
-    // k///////////////////////
-    unsigned int *k; // DEPRECATED: index for full matrix
-
-    // neighbor///////////////////////////////////////////////////////////////
-    //! \brief store the neighbors in +X, +Y, +Z, and in diagonal negative direction
-    //! \brief this information is important because we use an indirect addressing scheme
-    uint *neighborX, *neighborY, *neighborZ, *neighborInverse;
-
-    // coordinates////////////////////////////////////////////////////////////
-    //! \brief store the coordinates for every lattice node
-    real *coordinateX, *coordinateY, *coordinateZ;
 
     // body forces////////////
     real *forceX_SP, *forceY_SP, *forceZ_SP;
@@ -138,11 +186,6 @@ struct LBMSimulationParameter {
 
     // macroscopic values//////
     // real *vx, *vy, *vz, *rho;  // DEPRECATED: macroscopic values for full matrix
-    //! \brief store the macroscopic values (velocity, density, pressure)
-    //! \brief for every lattice node
-    real *velocityX, *velocityY, *velocityZ, *rho, *pressure;
-    //! \brief stores the value for omega
-    real omega;
     //! \brief stores the value for viscosity (on level 0)
     real vis;
 
@@ -163,11 +206,6 @@ struct LBMSimulationParameter {
     unsigned int size_Mat;
     unsigned int sizePlaneXY, sizePlaneYZ, sizePlaneXZ;
 
-    // size of sparse matrix//////////
-    //! \brief stores the number of nodes (based on indirect addressing scheme)
-    unsigned int numberOfNodes;
-    unsigned int size_Array_SP;
-
     // size of Plane btw. 2 GPUs//////
     unsigned int sizePlaneSB, sizePlaneRB, startB, endB;
     unsigned int sizePlaneST, sizePlaneRT, startT, endT;
@@ -180,16 +218,6 @@ struct LBMSimulationParameter {
     unsigned int sizePlanePressOUT, startPOUT;
     bool isSetPress;
 
-    // memsizeSP/////////////////
-    //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity)
-    unsigned int mem_size_real_SP;
-    unsigned int mem_size_int_SP;
-
-    // memsize/////////////////
-    unsigned int mem_size_real;
-    unsigned int mem_size_int;
-    unsigned int mem_size_bool;
-    unsigned int mem_size_real_yz;
 
     // print///////////////////
     unsigned int startz, endz;
@@ -218,16 +246,16 @@ struct LBMSimulationParameter {
     OffsetFC offFCBulk;
     unsigned int mem_size_kCF_off;
     unsigned int mem_size_kFC_off;
-
-    // BC's////////////////////
+    
     //! \brief stores the boundary condition data
     QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC, pressureBC;
     //! \brief number of lattice nodes for the boundary conditions
-    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead;
+    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead, numberOfPrecursorBCnodesRead;
 
     QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED
     QforBoundaryConditions propellerBC;
     QforBoundaryConditions geometryBC;
+    QforPrecursorBoundaryConditions precursorBC;
     QforBoundaryConditions geometryBCnormalX, geometryBCnormalY, geometryBCnormalZ;
     QforBoundaryConditions inflowBCnormalX, inflowBCnormalY, inflowBCnormalZ;
     QforBoundaryConditions outflowBCnormalX, outflowBCnormalY, outflowBCnormalZ;
@@ -235,6 +263,8 @@ struct LBMSimulationParameter {
     unsigned int kInletQread, kOutletQread;  // DEPRECATED
 
     WallModelParameters wallModel;
+    std::vector<SPtr<TransientBCInputFileReader>> transientBCInputFileReader;
+    real outflowPressureCorrectionFactor;
 
     // testRoundoffError
     Distributions27 kDistTestRE;
@@ -367,10 +397,19 @@ struct LBMSimulationParameter {
     std::vector<EdgeNodePositions> edgeNodesYtoZ;
 
     ///////////////////////////////////////////////////////
-    uint *fluidNodeIndices;
-    uint numberOfFluidNodes;
-    uint *fluidNodeIndicesBorder;
-    uint numberOfFluidNodesBorder;
+    std::map<CollisionTemplate, uint*>    taggedFluidNodeIndices = {{CollisionTemplate::Default,        nullptr},
+                                                                    {CollisionTemplate::SubDomainBorder,nullptr},
+                                                                    {CollisionTemplate::WriteMacroVars, nullptr},
+                                                                    {CollisionTemplate::ApplyBodyForce, nullptr},
+                                                                    {CollisionTemplate::AllFeatures,    nullptr}};
+    std::map<CollisionTemplate, uint >  numberOfTaggedFluidNodes = {{CollisionTemplate::Default,        0},
+                                                                    {CollisionTemplate::SubDomainBorder,0},
+                                                                    {CollisionTemplate::WriteMacroVars, 0},
+                                                                    {CollisionTemplate::ApplyBodyForce, 0},
+                                                                    {CollisionTemplate::AllFeatures,    0}};
+
+    std::vector<CollisionTemplate> allocatedBulkFluidNodeTags = {};
+
 };
 
 //! \brief Class for LBM-parameter management
@@ -471,6 +510,7 @@ public:
     void setpressBcPos(std::string pressBcPos);
     void setpressBcQs(std::string pressBcQs);
     void setpressBcValue(std::string pressBcValue);
+    void setOutflowPressureCorrectionFactor(real correctionFactor);
     void setpressBcValues(std::string pressBcValues);
     void setvelBcQs(std::string velBcQs);
     void setvelBcValues(std::string velBcValues);
@@ -527,7 +567,6 @@ public:
     void setUseWale(bool useWale);
     void setTurbulenceModel(TurbulenceModel turbulenceModel);
     void setUseTurbulentViscosity(bool useTurbulentViscosity);
-    void setUseAMD(bool useAMD);
     void setSGSConstant(real SGSConstant);
     void setHasWallModelMonitor(bool hasWallModelMonitor);
     void setUseInitNeq(bool useInitNeq);
@@ -726,10 +765,10 @@ public:
     unsigned int getPressOutID();
     unsigned int getPressInZ();
     unsigned int getPressOutZ();
-    unsigned int getMemSizereal(int level);
-    unsigned int getMemSizeInt(int level);
-    unsigned int getMemSizeBool(int level);
-    unsigned int getMemSizerealYZ(int level);
+//    unsigned int getMemSizereal(int level);    //DEPRECATED: related to full matrix
+//    unsigned int getMemSizeInt(int level);     //DEPRECATED: related to full matrix
+//    unsigned int getMemSizeBool(int level);    //DEPRECATED: related to full matrix
+//    unsigned int getMemSizerealYZ(int level);  //DEPRECATED: related to full matrix
     unsigned int getSizeMat(int level);
     unsigned int getTimestepStart();
     unsigned int getTimestepInit();
@@ -765,6 +804,8 @@ public:
     real getScaledDensityRatio(int level);
     //! \returns the pressure ratio in SI/LB units scaled to the respective level
     real getScaledPressureRatio(int level);
+    //! \returns the stress ratio in SI/LB units scaled to the respective level
+    real getScaledStressRatio(int level);
     //! \returns the time ratio in SI/LB units scaled to the respective level
     real getScaledTimeRatio(int level);
     //! \returns the length ratio in SI/LB units scaled to the respective level
@@ -853,6 +894,7 @@ public:
     std::string getOutflowBoundaryNormalX();
     std::string getOutflowBoundaryNormalY();
     std::string getOutflowBoundaryNormalZ();
+    real getOutflowPressureCorrectionFactor();
     // CUDA random number
     curandState *getRandomState();
     // Kernel
@@ -896,6 +938,8 @@ private:
 
     void setPathAndFilename(std::string fname);
 
+    void checkParameterValidityCumulantK17() const;
+
 private:
     bool compOn{ false };
     bool diffOn{ false };
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
index 4025acf7acad362e9f0f3702cb897b9c1b6dbf3b..72a12ae880556e6e257eb69dee4e806617252629 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
@@ -1,4 +1,3 @@
-#include <gmock/gmock.h>
 #include "basics/tests/testUtilities.h"
 
 #include <filesystem>
@@ -8,7 +7,6 @@
 #include "Parameter.h"
 #include "basics/config/ConfigurationFile.h"
 
-
 TEST(ParameterTest, passingEmptyFileWithoutPath_ShouldNotThrow)
 {
     // assuming that the config files is stored parallel to this file.
@@ -37,7 +35,9 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
 
     // test optional parameter
     EXPECT_THAT(para.getOutputPath(), testing::Eq("/output/path/"));
-    EXPECT_THAT(para.getGridPath(), testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well
+    EXPECT_THAT(
+        para.getGridPath(),
+        testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well
     EXPECT_THAT(para.getgeoVec(), testing::Eq("/path/to/grid/geoVec.dat"));
     EXPECT_THAT(para.getMaxDev(), testing::Eq(2));
     EXPECT_THAT(para.getDevices(), testing::ElementsAreArray({ 2, 3 }));
@@ -163,7 +163,7 @@ TEST(ParameterTest, setGridPathOverridesDefaultGridPath)
     Parameter para(2, 1);
     para.setGridPath("gridPathTest");
 
-    EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/1/"));
+    EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/1/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/1/conc.dat"));
 }
 
@@ -177,9 +177,8 @@ TEST(ParameterTest, setGridPathOverridesConfigFile)
     auto para = Parameter(2, 0, &config);
     para.setGridPath("gridPathTest");
 
-    EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/0/"));
+    EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat"));
-
 }
 
 TEST(ParameterTest, userMissedSlash)
@@ -189,7 +188,6 @@ TEST(ParameterTest, userMissedSlash)
 
     EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/conc.dat"));
-
 }
 
 TEST(ParameterTest, userMissedSlashMultiGPU)
@@ -199,4 +197,87 @@ TEST(ParameterTest, userMissedSlashMultiGPU)
 
     EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat"));
-}
\ No newline at end of file
+}
+
+class ParameterTestCumulantK17 : public testing::Test
+{
+protected:
+    void SetUp() override
+    {
+    }
+
+    bool stdoutContainsWarning()
+    {
+        std::string output = testing::internal::GetCapturedStdout();
+        return output.find("warning") != std::string::npos;
+    }
+
+    Parameter para;
+};
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsTooHigh_expectWarning)
+{
+
+    para.setVelocityLB(0.11);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_TRUE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsOk_expectNoWarning)
+{
+    para.setVelocityLB(0.09);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, NotCumulantK17_VelocityIsTooHigh_expectNoWarning)
+{
+    para.setVelocityLB(42);
+    para.setMainKernel("K");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsTooHigh_expectWarning)
+{
+    para.setViscosityLB(0.024);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_TRUE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsOk_expectNoWarning)
+{
+    para.setViscosityLB(0.023);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, NotCumulantK17_ViscosityIsTooHigh_expectNoWarning)
+{
+    para.setViscosityLB(10);
+    para.setMainKernel("K");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
index 7c710f50afb0ae07edd53ef9d68e294c7af54ac1..e0156e3fbae46282baeb1359c719a077f021cf6b 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
@@ -141,12 +141,12 @@ void initParticles(Parameter* para)
 			para->getParH(lev)->plp.coordZabsolut[i] = (real)zCoordVec[i]; 
 
 			// find IDs
-			for (unsigned int ii = 0; ii < para->getParH(lev)->numberOfNodes; ii++)
+			for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
 			{
-				if ((para->getParH(lev)->coordinateX[ii] <= para->getParH(lev)->plp.coordXabsolut[i]) &&
-					((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[ii]) <= dx))
+				if ((para->getParH(lev)->coordinateX[index] <= para->getParH(lev)->plp.coordXabsolut[i]) &&
+					((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[index]) <= dx))
 				{
-					tempID.push_back(ii);
+					tempID.push_back((int)index);
 				}
 			}
 
@@ -455,7 +455,7 @@ void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 		int counter2 = 0;
 		//////////////////////////////////////////////////////////////////////////
 		//redefine fluid nodes
-		for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++)
+		for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
 		{
 			if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD)
 			{
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9447a8636e801c132df9cef2feced4b5ab4e68de
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
@@ -0,0 +1,629 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ActuatorFarm.cu
+//! \ingroup PreCollisionInteractor
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
+#include "ActuatorFarm.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#include "cuda/CudaGrid.h"
+#include "VirtualFluids_GPU/GPU/GeometryUtils.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+
+#include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+#include "lbm/constants/NumericConstants.h"
+#include "logger/Logger.h"
+
+using namespace vf::lbm::constant;
+
+
+__host__ __device__ __inline__ uint calcNode(uint bladeNode, uint numberOfBladeNodes, uint blade, uint numberOfBlades, uint turbine, uint numberOfTurbines)
+{
+
+    return bladeNode+numberOfBladeNodes*(blade+numberOfBlades*turbine);
+}
+
+__host__ __device__ __inline__ void calcTurbineBladeAndBladeNode(uint node, uint& bladeNode, uint numberOfBladeNodes, uint& blade, uint numberOfBlades, uint& turbine, uint numberOfTurbines)
+{
+    turbine = node/(numberOfBladeNodes*numberOfBlades);
+    uint x_off = turbine*numberOfBladeNodes*numberOfBlades;
+    blade = (node - x_off)/numberOfBlades;
+    uint y_off = numberOfBladeNodes*blade+x_off;
+    bladeNode = (node - y_off)/numberOfBladeNodes;
+}
+
+__host__ __device__ __forceinline__ real distSqrd(real distX, real distY, real distZ)
+{
+    return distX*distX+distY*distY+distZ*distZ;
+}
+
+void swapArrays(real* &arr1, real* &arr2)
+{
+    real* tmp = arr1;
+    arr1 = arr2;
+    arr2 = tmp;
+}
+
+__host__ __device__ __inline__ void rotateFromBladeToGlobal(
+                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
+                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
+                            real& azimuth, real& yaw)
+{
+    real tmpX, tmpY, tmpZ;
+
+    rotateAboutX3D(azimuth, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, tmpX, tmpY, tmpZ);
+    rotateAboutZ3D(yaw, tmpX, tmpY, tmpZ, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF);
+
+}
+
+__host__ __device__ __inline__ void rotateFromGlobalToBlade(
+                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
+                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
+                            real& azimuth, real& yaw)
+{
+    real tmpX, tmpY, tmpZ;
+
+    invRotateAboutZ3D(yaw, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, tmpX, tmpY, tmpZ);
+    invRotateAboutX3D(azimuth, tmpX, tmpY, tmpZ, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF);
+}
+
+__global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, 
+                                      uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB, 
+                                      real* vx, real* vy, real* vz, 
+                                      real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ,
+                                      real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, 
+                                      uint numberOfTurbines, uint numberOfBlades, uint numberOfBladeNodes, 
+                                      real* azimuths, real* yaws, real* omegas, 
+                                      real* turbPosX, real* turbPosY, real* turbPosZ,
+                                      uint* bladeIndices, real velocityRatio, real invDeltaX)
+{
+
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
+
+    if(nodeIndex>=numberOfBladeNodes*numberOfBlades*numberOfTurbines) return;
+
+    uint turbine, bladeNode, blade;
+
+    calcTurbineBladeAndBladeNode(nodeIndex, bladeNode, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines);
+
+    real bladeCoordX_BF = bladeCoordsX[nodeIndex];
+    real bladeCoordY_BF = bladeCoordsY[nodeIndex];
+    real bladeCoordZ_BF = bladeCoordsZ[nodeIndex];
+
+    real bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF;
+
+    real localAzimuth = azimuths[turbine]+blade*c2Pi/numberOfBlades;
+    real yaw = yaws[turbine];
+
+
+    rotateFromBladeToGlobal(bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, 
+                            bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF,
+                            localAzimuth, yaw);
+
+    bladeCoordX_GF += turbPosX[turbine];
+    bladeCoordY_GF += turbPosY[turbine];
+    bladeCoordZ_GF += turbPosZ[turbine];
+
+    uint k, ke, kn, kt;
+    uint kne, kte, ktn, ktne;
+
+    k = findNearestCellBSW(bladeIndices[nodeIndex], 
+                           gridCoordsX, gridCoordsY, gridCoordsZ, 
+                           bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, 
+                           neighborsX, neighborsY, neighborsZ, neighborsWSB);
+        
+    bladeIndices[nodeIndex] = k;
+
+    getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ);
+
+    real dW, dE, dN, dS, dT, dB;
+
+    real distX = invDeltaX*(bladeCoordX_GF-gridCoordsX[k]);
+    real distY = invDeltaX*(bladeCoordY_GF-gridCoordsY[k]);
+    real distZ = invDeltaX*(bladeCoordZ_GF-gridCoordsZ[k]);
+
+    getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX, distY, distZ);
+
+    real bladeVelX_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx)*velocityRatio;
+    real bladeVelY_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy)*velocityRatio;
+    real bladeVelZ_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz)*velocityRatio;
+
+    real bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF;
+
+    rotateFromGlobalToBlade(bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF, 
+                            bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, 
+                            localAzimuth, yaw);
+
+    bladeVelocitiesX[nodeIndex] = bladeVelX_BF;
+    bladeVelocitiesY[nodeIndex] = bladeVelY_BF+omegas[turbine]*bladeCoordZ_BF;
+    bladeVelocitiesZ[nodeIndex] = bladeVelZ_BF;
+}
+
+
+__global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ,
+                                real* gridForcesX, real* gridForcesY, real* gridForcesZ, 
+                                real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
+                                real* bladeForcesX, real* bladeForcesY, real* bladeForcesZ,
+                                const uint numberOfTurbines, const uint numberOfBlades, const uint numberOfBladeNodes,
+                                real* azimuths, real* yaws, real* diameters,
+                                real* turbPosX, real* turbPosY, real* turbPosZ,
+                                uint* gridIndices, uint nIndices, 
+                                const real invEpsilonSqrd, const real factorGaussian)
+{
+
+    const uint index = vf::gpu::getNodeIndex();
+
+    if(index>=nIndices) return;
+
+
+    uint gridIndex = gridIndices[index];
+
+    real gridCoordX_GF = gridCoordsX[gridIndex];
+    real gridCoordY_GF = gridCoordsY[gridIndex];
+    real gridCoordZ_GF = gridCoordsZ[gridIndex];
+
+    real gridForceX_RF = c0o1;
+    real gridForceY_RF = c0o1;
+    real gridForceZ_RF = c0o1;
+
+    real dAzimuth = c2Pi/numberOfBlades;
+
+    for(uint turbine = 0; turbine<numberOfTurbines; turbine++)
+    {
+        real radius = c1o2*diameters[turbine];
+        real gridCoordX_RF = gridCoordX_GF - turbPosX[turbine];
+        real gridCoordY_RF = gridCoordY_GF - turbPosY[turbine];
+        real gridCoordZ_RF = gridCoordZ_GF - turbPosZ[turbine];
+
+        if(distSqrd(gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF)*invEpsilonSqrd > radius*radius*invEpsilonSqrd+c7o1)
+            continue;
+
+        real azimuth = azimuths[turbine];
+        real yaw = yaws[turbine];
+
+        for( uint blade=0; blade<numberOfBlades; blade++)
+        { 
+            real localAzimuth = azimuth+blade*dAzimuth;
+
+
+            real gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF;
+
+            rotateFromGlobalToBlade(gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF,
+                                    gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF,
+                                    localAzimuth, yaw);
+            
+            uint node;
+            uint nextNode = calcNode(0, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines);
+
+            real last_z = c0o1;
+            real current_z = c0o1;
+            real next_z = bladeCoordsZ[nextNode];
+
+            real x, y, dz, eta, forceX_RF, forceY_RF, forceZ_RF;
+
+            for( uint bladeNode=0; bladeNode<numberOfBladeNodes-1; bladeNode++)
+            {
+                node = nextNode;
+                nextNode = calcNode(bladeNode+1, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines);
+
+                x = bladeCoordsX[node];
+                y = bladeCoordsY[node];
+                last_z = current_z;
+                current_z = next_z;
+                next_z = bladeCoordsZ[nextNode];
+
+                dz = c1o2*(next_z-last_z);
+
+                eta = dz*factorGaussian*exp(-distSqrd(x-gridCoordX_BF, y-gridCoordY_BF, current_z-gridCoordZ_BF)*invEpsilonSqrd);
+                rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], 
+                                        forceX_RF, forceY_RF, forceZ_RF, 
+                                        localAzimuth, yaw);
+                                        
+                gridForceX_RF += forceX_RF*eta;
+                gridForceY_RF += forceY_RF*eta;
+                gridForceZ_RF += forceZ_RF*eta;
+            }
+
+            //Handle last node separately
+
+            node = nextNode;
+
+            x = bladeCoordsX[node];
+            y = bladeCoordsY[node];
+            last_z = current_z;
+            current_z = next_z;
+
+            dz = c1o2*(radius-last_z);
+
+            eta = dz*factorGaussian*exp(-distSqrd(x-gridCoordX_BF, y-gridCoordY_BF, current_z-gridCoordZ_BF)*invEpsilonSqrd);
+
+            rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], 
+                                    forceX_RF, forceY_RF, forceZ_RF, 
+                                    localAzimuth, yaw);
+                
+            gridForceX_RF += forceX_RF*eta;
+            gridForceY_RF += forceY_RF*eta;
+            gridForceZ_RF += forceZ_RF*eta;
+        }
+    }
+
+    gridForcesX[gridIndex] += gridForceX_RF;
+    gridForcesY[gridIndex] += gridForceY_RF;
+    gridForcesZ[gridIndex] += gridForceZ_RF;
+}
+
+void ActuatorFarm::addTurbine(real posX, real posY, real posZ, real diameter, real omega, real azimuth, real yaw, std::vector<real> bladeRadii)
+{
+    preInitPosX.push_back(posX);
+    preInitPosY.push_back(posY);
+    preInitPosZ.push_back(posZ);
+    preInitOmegas.push_back(omega);
+    preInitAzimuths.push_back(azimuth);
+    preInitYaws.push_back(yaw);
+    preInitDiameters.push_back(diameter);
+    preInitBladeRadii.push_back(bladeRadii);
+}
+
+void ActuatorFarm::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager)
+{
+    if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorFarm but BodyForce is not set in Parameter.");
+    this->forceRatio = para->getForceRatio();
+    this->initTurbineGeometries(cudaMemoryManager);
+    this->initBladeCoords(cudaMemoryManager);    
+    this->initBladeIndices(para, cudaMemoryManager);
+    this->initBladeVelocities(cudaMemoryManager);
+    this->initBladeForces(cudaMemoryManager);    
+    this->initBoundingSpheres(para, cudaMemoryManager);  
+    this->streamIndex = 0;
+}
+
+void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t)
+{
+    if (level != this->level) return;
+
+    cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::ActuatorFarm, this->streamIndex);
+
+    if(useHostArrays) cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
+
+    vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfNodes);
+
+    interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads, 0, stream >>>(
+        para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ,        
+        para->getParD(this->level)->neighborX, para->getParD(this->level)->neighborY, para->getParD(this->level)->neighborZ, para->getParD(this->level)->neighborInverse,
+        para->getParD(this->level)->velocityX, para->getParD(this->level)->velocityY, para->getParD(this->level)->velocityZ,
+        this->bladeCoordsXDCurrentTimestep, this->bladeCoordsYDCurrentTimestep, this->bladeCoordsZDCurrentTimestep,  
+        this->bladeVelocitiesXDCurrentTimestep, this->bladeVelocitiesYDCurrentTimestep, this->bladeVelocitiesZDCurrentTimestep,  
+        this->numberOfTurbines, this->numberOfBlades, this->numberOfBladeNodes,
+        this->azimuthsD, this->yawsD, this->omegasD, 
+        this->turbinePosXD, this->turbinePosYD, this->turbinePosZD,
+        this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX);
+
+    cudaStreamSynchronize(stream);
+    if(useHostArrays) cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this);
+    this->calcBladeForces();
+    this->swapDeviceArrays();
+
+    if(useHostArrays) cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
+
+    vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfIndices);
+
+    applyBodyForces<<<sphereGrid.grid, sphereGrid.threads, 0, stream>>>(
+        para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ,        
+        para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP,        
+        this->bladeCoordsXDCurrentTimestep, this->bladeCoordsYDCurrentTimestep, this->bladeCoordsZDCurrentTimestep,  
+        this->bladeForcesXDCurrentTimestep, this->bladeForcesYDCurrentTimestep, this->bladeForcesZDCurrentTimestep,
+        this->numberOfTurbines, this->numberOfBlades, this->numberOfBladeNodes,
+        this->azimuthsD, this->yawsD, this->diametersD,
+        this->turbinePosXD, this->turbinePosYD, this->turbinePosZD,
+        this->boundingSphereIndicesD, this->numberOfIndices,
+        this->invEpsilonSqrd, this->factorGaussian);
+    cudaMemoryManager->cudaCopyBladeOrientationsHtoD(this);
+    cudaStreamSynchronize(stream);
+}
+
+
+void ActuatorFarm::free(Parameter* para, CudaMemoryManager* cudaMemoryManager)
+{
+    cudaMemoryManager->cudaFreeBladeGeometries(this);
+    cudaMemoryManager->cudaFreeBladeOrientations(this);
+    cudaMemoryManager->cudaFreeBladeCoords(this);
+    cudaMemoryManager->cudaFreeBladeVelocities(this);
+    cudaMemoryManager->cudaFreeBladeForces(this);
+    cudaMemoryManager->cudaFreeBladeIndices(this);
+    cudaMemoryManager->cudaFreeSphereIndices(this);
+}
+
+
+void ActuatorFarm::calcForcesEllipticWing()
+{
+    real u_rel, v_rel, u_rel_sq;
+    real phi;
+    real Cl = c1o1;
+    real Cd = c0o1;
+    real c0 = 20*c1o10;
+    real c, Cn, Ct;
+    for(uint turbine=0; turbine<this->numberOfTurbines; turbine++)
+    {
+        real diameter = this->diametersH[turbine];
+        for( uint blade=0; blade<this->numberOfBlades; blade++)
+        { 
+            for( uint bladeNode=0; bladeNode<this->numberOfBladeNodes; bladeNode++)
+            {        
+                uint node = calcNode(bladeNode, this->numberOfBladeNodes, blade, this->numberOfBlades, turbine, this->numberOfTurbines);
+
+                u_rel = this->bladeVelocitiesXH[node];
+                v_rel = this->bladeVelocitiesYH[node];
+                u_rel_sq = u_rel*u_rel+v_rel*v_rel;
+                phi = atan2(u_rel, v_rel);
+                
+                real tmp = c4o1*this->bladeRadiiH[bladeNode]/diameter-c1o1;
+                c = c0 * sqrt( c1o1- tmp*tmp );
+                Cn = Cl*cos(phi)+Cd*sin(phi);
+                Ct = Cl*sin(phi)-Cd*cos(phi);
+                real fx = c1o2*u_rel_sq*c*this->density*Cn;
+                real fy = c1o2*u_rel_sq*c*this->density*Ct;
+                this->bladeForcesXH[node] = -fx;
+                this->bladeForcesYH[node] = -fy;
+                this->bladeForcesZH[node] = c0o1;
+                // printf("u %f v %f fx %f fy %f \n", u_rel, v_rel, fx, fy);
+            }
+        }
+        azimuthsH[turbine] = azimuthsH[turbine]+deltaT*omegasH[turbine];
+    }
+}
+
+void ActuatorFarm::calcBladeForces()
+{
+    this->calcForcesEllipticWing();
+}
+
+void ActuatorFarm::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
+{
+    std::vector<uint> indicesInSphere(this->boundingSphereIndicesH, this->boundingSphereIndicesH+this->numberOfIndices);
+    gridProvider->tagFluidNodeIndices(indicesInSphere, CollisionTemplate::AllFeatures, this->level);
+}   
+
+
+void ActuatorFarm::initTurbineGeometries(CudaMemoryManager* cudaMemoryManager)
+{
+    this->numberOfTurbines = uint(this->preInitDiameters.size());
+    this->numberOfNodes = numberOfTurbines*numberOfBladeNodes*numberOfBlades;
+
+    cudaMemoryManager->cudaAllocBladeGeometries(this);
+    cudaMemoryManager->cudaAllocBladeOrientations(this);
+
+    for(uint turbine=0; turbine<this->numberOfTurbines; turbine++)
+    {
+        for(uint node=0; node<this->numberOfBladeNodes; node++)
+        {
+            this->bladeRadiiH[calcNode(node, numberOfBladeNodes, 0, 1, turbine, numberOfTurbines)] = this->preInitBladeRadii[turbine][node];
+        }
+
+    }
+    std::copy(preInitPosX.begin(), preInitPosX.end(), turbinePosXH);
+    std::copy(preInitPosY.begin(), preInitPosY.end(), turbinePosYH);
+    std::copy(preInitPosZ.begin(), preInitPosZ.end(), turbinePosZH);
+    std::copy(preInitDiameters.begin(), preInitDiameters.end(), diametersH);
+
+    cudaMemoryManager->cudaCopyBladeGeometriesHtoD(this);
+    std::copy(preInitAzimuths.begin(), preInitAzimuths.end(), this->azimuthsH);
+    std::copy(preInitOmegas.begin(), preInitOmegas.end(), this->omegasH);
+    std::copy(preInitYaws.begin(), preInitYaws.end(), this->yawsH);
+
+    cudaMemoryManager->cudaCopyBladeOrientationsHtoD(this);
+    this->factorGaussian = pow(this->epsilon*sqrt(cPi),-c3o1)/this->forceRatio;
+}
+
+void ActuatorFarm::initBladeCoords(CudaMemoryManager* cudaMemoryManager)
+{   
+    cudaMemoryManager->cudaAllocBladeCoords(this);
+
+    for(uint turbine=0; turbine<numberOfTurbines; turbine++)
+    {
+        for(uint blade=0; blade<this->numberOfBlades; blade++)
+        {
+            for(uint bladeNode=0; bladeNode<this->numberOfBladeNodes; bladeNode++)
+            {
+                uint node = calcNode(bladeNode, this->numberOfBladeNodes, blade, this->numberOfBlades, turbine, this->numberOfTurbines);
+
+                this->bladeCoordsXH[node] = c0o1;
+                this->bladeCoordsYH[node] = c0o1;
+                this->bladeCoordsZH[node] = this->bladeRadiiH[calcNode(bladeNode, numberOfBladeNodes, 0, 1, turbine, numberOfTurbines)];
+            }
+        }
+    }
+    cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
+    swapArrays(this->bladeCoordsXDCurrentTimestep, this->bladeCoordsXDPreviousTimestep);
+    swapArrays(this->bladeCoordsYDCurrentTimestep, this->bladeCoordsYDPreviousTimestep);
+    swapArrays(this->bladeCoordsZDCurrentTimestep, this->bladeCoordsZDPreviousTimestep);
+    cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
+}
+
+void ActuatorFarm::initBladeVelocities(CudaMemoryManager* cudaMemoryManager)
+{   
+    cudaMemoryManager->cudaAllocBladeVelocities(this);
+
+    std::fill_n(this->bladeVelocitiesXH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeVelocitiesYH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeVelocitiesZH, this->numberOfNodes, c0o1);
+
+    cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this);
+    swapArrays(this->bladeVelocitiesXDCurrentTimestep, this->bladeVelocitiesXDPreviousTimestep);
+    swapArrays(this->bladeVelocitiesYDCurrentTimestep, this->bladeVelocitiesYDPreviousTimestep);
+    swapArrays(this->bladeVelocitiesZDCurrentTimestep, this->bladeVelocitiesZDPreviousTimestep);
+    cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this);
+}
+
+void ActuatorFarm::initBladeForces(CudaMemoryManager* cudaMemoryManager)
+{   
+    cudaMemoryManager->cudaAllocBladeForces(this);
+
+    std::fill_n(this->bladeForcesXH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeForcesYH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeForcesZH, this->numberOfNodes, c0o1);
+
+    cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
+    swapArrays(this->bladeForcesXDCurrentTimestep, this->bladeForcesXDPreviousTimestep);
+    swapArrays(this->bladeForcesYDCurrentTimestep, this->bladeForcesYDPreviousTimestep);
+    swapArrays(this->bladeForcesZDCurrentTimestep, this->bladeForcesZDPreviousTimestep);
+    cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
+}
+
+void ActuatorFarm::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager)
+{   
+    cudaMemoryManager->cudaAllocBladeIndices(this);
+
+    std::fill_n(this->bladeIndicesH, this->numberOfNodes, 1);
+
+    cudaMemoryManager->cudaCopyBladeIndicesHtoD(this);
+}
+
+void ActuatorFarm::initBoundingSpheres(Parameter* para, CudaMemoryManager* cudaMemoryManager)
+{
+    std::vector<int> nodesInSpheres;
+
+    for(uint turbine=0; turbine<this->numberOfTurbines; turbine++)
+    {
+        real sphereRadius = c1o2*this->diametersH[turbine]+c4o1*this->epsilon;
+
+        real posX = this->turbinePosXH[turbine];
+        real posY = this->turbinePosYH[turbine];
+        real posZ = this->turbinePosZH[turbine];
+
+        real sphereRadiusSqrd = sphereRadius*sphereRadius;
+            
+        uint minimumNumberOfNodesPerSphere = (uint)(c4o3*cPi*pow(sphereRadius-this->deltaX, c3o1)/pow(this->deltaX, c3o1));
+        uint nodesInThisSphere = 0;
+
+        for (size_t pos = 1; pos <= para->getParH(this->level)->numberOfNodes; pos++)
+        {
+            const real distX = para->getParH(this->level)->coordinateX[pos]-posX;
+            const real distY = para->getParH(this->level)->coordinateY[pos]-posY;
+            const real distZ = para->getParH(this->level)->coordinateZ[pos]-posZ;
+            if(distSqrd(distX,distY,distZ) < sphereRadiusSqrd) 
+            {
+                nodesInSpheres.push_back((int)pos);
+                nodesInThisSphere++;
+            }
+        }
+
+        if(nodesInThisSphere<minimumNumberOfNodesPerSphere)
+        {
+            VF_LOG_CRITICAL("Found only {} nodes in bounding sphere of turbine no. {}, expected at least {}!", nodesInThisSphere, turbine, minimumNumberOfNodesPerSphere);
+            throw std::runtime_error("ActuatorFarm::initBoundingSpheres: Turbine bounding sphere partially out of domain.");
+        }
+    }
+
+    this->numberOfIndices = uint(nodesInSpheres.size());
+
+    cudaMemoryManager->cudaAllocSphereIndices(this);
+    std::copy(nodesInSpheres.begin(), nodesInSpheres.end(), this->boundingSphereIndicesH);
+    cudaMemoryManager->cudaCopySphereIndicesHtoD(this);
+}
+
+void ActuatorFarm::setAllAzimuths(real* _azimuths)
+{ 
+    std::copy_n(_azimuths, this->numberOfTurbines, this->azimuthsH);
+}
+
+void ActuatorFarm::setAllOmegas(real* _omegas)
+{ 
+    std::copy_n(_omegas, this->numberOfTurbines, this->omegasH);
+}
+
+void ActuatorFarm::setAllYaws(real* _yaws)
+{ 
+    std::copy_n(_yaws, this->numberOfTurbines, this->yawsH);
+}
+
+void ActuatorFarm::setAllBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
+{ 
+    std::copy_n(_bladeCoordsX, this->numberOfNodes, this->bladeCoordsXH);
+    std::copy_n(_bladeCoordsY, this->numberOfNodes, this->bladeCoordsYH);
+    std::copy_n(_bladeCoordsZ, this->numberOfNodes, this->bladeCoordsZH);
+}
+
+void ActuatorFarm::setAllBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ)
+{ 
+    std::copy_n(_bladeVelocitiesX, this->numberOfNodes, this->bladeVelocitiesXH);
+    std::copy_n(_bladeVelocitiesY, this->numberOfNodes, this->bladeVelocitiesYH);
+    std::copy_n(_bladeVelocitiesZ, this->numberOfNodes, this->bladeVelocitiesZH);
+}
+
+void ActuatorFarm::setAllBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ)
+{ 
+    std::copy_n(_bladeForcesX, this->numberOfNodes, this->bladeForcesXH);
+    std::copy_n(_bladeForcesY, this->numberOfNodes, this->bladeForcesYH);
+    std::copy_n(_bladeForcesZ, this->numberOfNodes, this->bladeForcesZH);
+
+}void ActuatorFarm::setTurbineBladeCoords(uint turbine, real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
+{ 
+    std::copy_n(_bladeCoordsX, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsXH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeCoordsY, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsYH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeCoordsZ, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsZH[turbine*numberOfBladeNodes*numberOfBlades]);
+}
+
+void ActuatorFarm::setTurbineBladeVelocities(uint turbine, real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ)
+{ 
+    std::copy_n(_bladeVelocitiesX, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesXH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeVelocitiesY, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesYH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeVelocitiesZ, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesZH[turbine*numberOfBladeNodes*numberOfBlades]);
+}
+
+void ActuatorFarm::setTurbineBladeForces(uint turbine, real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ)
+{ 
+    std::copy_n(_bladeForcesX, numberOfBladeNodes*numberOfBlades, &this->bladeForcesXH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeForcesY, numberOfBladeNodes*numberOfBlades, &this->bladeForcesYH[turbine*numberOfBladeNodes*numberOfBlades]);
+    std::copy_n(_bladeForcesZ, numberOfBladeNodes*numberOfBlades, &this->bladeForcesZH[turbine*numberOfBladeNodes*numberOfBlades]);
+}
+
+void ActuatorFarm::swapDeviceArrays()
+{
+    swapArrays(this->bladeCoordsXDPreviousTimestep, this->bladeCoordsXDCurrentTimestep);
+    swapArrays(this->bladeCoordsYDPreviousTimestep, this->bladeCoordsYDCurrentTimestep);
+    swapArrays(this->bladeCoordsZDPreviousTimestep, this->bladeCoordsZDCurrentTimestep);
+
+    swapArrays(this->bladeVelocitiesXDPreviousTimestep, this->bladeVelocitiesXDCurrentTimestep);
+    swapArrays(this->bladeVelocitiesYDPreviousTimestep, this->bladeVelocitiesYDCurrentTimestep);
+    swapArrays(this->bladeVelocitiesZDPreviousTimestep, this->bladeVelocitiesZDCurrentTimestep);
+
+    swapArrays(this->bladeForcesXDPreviousTimestep, this->bladeForcesXDCurrentTimestep);
+    swapArrays(this->bladeForcesYDPreviousTimestep, this->bladeForcesYDCurrentTimestep);
+    swapArrays(this->bladeForcesZDPreviousTimestep, this->bladeForcesZDCurrentTimestep);
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e21cdb6b21efd323f6723e21d6b28614109f1ec
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
@@ -0,0 +1,197 @@
+#ifndef ActuatorFarm_H
+#define ActuatorFarm_H
+
+#include "PreCollisionInteractor.h"
+#include "PointerDefinitions.h"
+#include "lbm/constants/NumericConstants.h"
+#include <stdexcept>
+
+using namespace vf::lbm::constant;
+
+class Parameter;
+class GridProvider;
+using namespace vf::lbm::constant;
+
+class ActuatorFarm : public PreCollisionInteractor
+{
+public:
+    ActuatorFarm(
+        const uint _nBlades,
+        const real _density,
+        const uint _nBladeNodes,
+        const real _epsilon,
+        int _level,
+        const real _deltaT,
+        const real _deltaX,
+        const bool _useHostArrays
+    ) :
+        numberOfBlades(_nBlades),
+        density(_density),
+        numberOfBladeNodes(_nBladeNodes), 
+        epsilon(_epsilon),
+        level(_level),
+        useHostArrays(_useHostArrays),
+        numberOfTurbines(0),
+        numberOfNodes(0),
+        PreCollisionInteractor()
+    {
+        this->deltaT = _deltaT*exp2(-this->level);
+        this->deltaX = _deltaX*exp2(-this->level);
+        this->invEpsilonSqrd = 1/(epsilon*epsilon);
+        this->invDeltaX = c1o1/this->deltaX;
+     
+        if(this->epsilon<this->deltaX)
+            throw std::runtime_error("ActuatorFarm::ActuatorFarm: epsilon needs to be larger than dx!");
+    }
+
+    ~ActuatorFarm() override = default;
+    void addTurbine(real turbinePosX, real turbinePosY, real turbinePosZ, real diameter, real omega, real azimuth, real yaw, std::vector<real> bladeRadii);
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void free(Parameter* para, CudaMemoryManager* cudaManager) override;
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
+
+    void write(uint t);
+
+    real getDensity(){ return this->density; };
+    real getDeltaT(){ return this->deltaT; };
+    real getDeltaX(){ return this->deltaX; };
+
+    uint getNumberOfTurbines(){ return this->numberOfTurbines; };
+    uint getNumberOfNodesPerBlade(){ return this->numberOfBladeNodes; };
+    uint getNumberOfBladesPerTurbine(){ return this->numberOfBlades; };
+
+    uint getNumberOfIndices(){ return this->numberOfIndices; };
+    uint getNumberOfNodes(){ return this->numberOfNodes; };
+
+    real* getAllAzimuths(){ return azimuthsH; };
+    real* getAllOmegas(){ return omegasH; };
+    real* getAllYaws(){ return yawsH; };
+
+    real* getAllTurbinePosX(){ return turbinePosXH; };
+    real* getAllTurbinePosY(){ return turbinePosYH; };
+    real* getAllTurbinePosZ(){ return turbinePosZH; };
+
+    real getTurbineAzimuth(uint turbine){ return azimuthsH[turbine]; };
+    real getTurbineOmega  (uint turbine){ return omegasH[turbine];   };
+    real getTurbineYaw    (uint turbine){ return yawsH[turbine];     };
+
+    real getTurbinePosX(uint turbine){ return turbinePosXH[turbine]; };
+    real getTurbinePosY(uint turbine){ return turbinePosYH[turbine]; };
+    real getTurbinePosZ(uint turbine){ return turbinePosZH[turbine]; };
+
+    real* getAllBladeRadii(){ return this->bladeRadiiH; };
+    real* getAllBladeCoordsX(){ return this->bladeCoordsXH; };
+    real* getAllBladeCoordsY(){ return this->bladeCoordsYH; };
+    real* getAllBladeCoordsZ(){ return this->bladeCoordsZH; };
+    real* getAllBladeVelocitiesX(){ return this->bladeVelocitiesXH; };
+    real* getAllBladeVelocitiesY(){ return this->bladeVelocitiesYH; };
+    real* getAllBladeVelocitiesZ(){ return this->bladeVelocitiesZH; };
+    real* getAllBladeForcesX(){ return this->bladeForcesXH; };
+    real* getAllBladeForcesY(){ return this->bladeForcesYH; };
+    real* getAllBladeForcesZ(){ return this->bladeForcesZH; };
+
+    real* getTurbineBladeRadii(uint turbine){ return &this->bladeRadiiH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeCoordsX(uint turbine){ return &this->bladeCoordsXH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeCoordsY(uint turbine){ return &this->bladeCoordsYH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeCoordsZ(uint turbine){ return &this->bladeCoordsZH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesX(uint turbine){ return &this->bladeVelocitiesXH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesY(uint turbine){ return &this->bladeVelocitiesYH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesZ(uint turbine){ return &this->bladeVelocitiesZH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesX(uint turbine){ return &this->bladeForcesXH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesY(uint turbine){ return &this->bladeForcesYH[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesZ(uint turbine){ return &this->bladeForcesZH[turbine*numberOfBladeNodes*numberOfBlades]; };
+
+    real* getAllBladeRadiiDevice(){ return this->bladeRadiiD; };
+    real* getAllBladeCoordsXDevice(){ return this->bladeCoordsXDCurrentTimestep; };
+    real* getAllBladeCoordsYDevice(){ return this->bladeCoordsYDCurrentTimestep; };
+    real* getAllBladeCoordsZDevice(){ return this->bladeCoordsZDCurrentTimestep; };
+    real* getAllBladeVelocitiesXDevice(){ return this->bladeVelocitiesXDCurrentTimestep; };
+    real* getAllBladeVelocitiesYDevice(){ return this->bladeVelocitiesYDCurrentTimestep; };
+    real* getAllBladeVelocitiesZDevice(){ return this->bladeVelocitiesZDCurrentTimestep; };
+    real* getAllBladeForcesXDevice(){ return this->bladeForcesXDCurrentTimestep; };
+    real* getAllBladeForcesYDevice(){ return this->bladeForcesYDCurrentTimestep; };
+    real* getAllBladeForcesZDevice(){ return this->bladeForcesZDCurrentTimestep; };
+
+    real* getTurbineBladeRadiiDevice(uint turbine){ return &this->bladeRadiiD[turbine*numberOfBladeNodes]; };
+    real* getTurbineBladeCoordsXDevice(uint turbine){ return &this->bladeCoordsXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeCoordsYDevice(uint turbine){ return &this->bladeCoordsYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeCoordsZDevice(uint turbine){ return &this->bladeCoordsZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesXDevice(uint turbine){ return &this->bladeVelocitiesXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesYDevice(uint turbine){ return &this->bladeVelocitiesYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeVelocitiesZDevice(uint turbine){ return &this->bladeVelocitiesZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesXDevice(uint turbine){ return &this->bladeForcesXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesYDevice(uint turbine){ return &this->bladeForcesYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+    real* getTurbineBladeForcesZDevice(uint turbine){ return &this->bladeForcesZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; };
+
+    void setAllAzimuths(real* _azimuth);
+    void setAllOmegas(real* _omegas);
+    void setAllYaws(real* yaws);
+    
+    void setTurbineAzimuth(uint turbine, real azimuth){ azimuthsH[turbine] = azimuth; };
+    void setTurbineYaw(uint turbine, real yaw){ yawsH[turbine] = yaw; };
+    void setTurbineOmega(uint turbine, real omega){ omegasH[turbine] = omega; };
+
+    void setAllBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ);
+    void setAllBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ);
+    void setAllBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ);
+
+    void setTurbineBladeCoords(uint turbine, real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ);
+    void setTurbineBladeVelocities(uint turbine, real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ);
+    void setTurbineBladeForces(uint turbine, real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ);
+
+    virtual void calcBladeForces();
+
+private:
+    void initTurbineGeometries(CudaMemoryManager* cudaManager);
+    void initBoundingSpheres(Parameter* para, CudaMemoryManager* cudaManager);
+    void initBladeCoords(CudaMemoryManager* cudaManager);
+    void initBladeVelocities(CudaMemoryManager* cudaManager);
+    void initBladeForces(CudaMemoryManager* cudaManager);
+    void initBladeIndices(Parameter* para, CudaMemoryManager* cudaManager);
+
+    void calcForcesEllipticWing();
+    void rotateBlades(real angle, uint turbineID);
+
+    void writeBladeCoords(uint t);
+    void writeBladeForces(uint t);
+    void writeBladeVelocities(uint t);
+
+    void swapDeviceArrays();
+
+public:
+    real* bladeRadiiH;
+    real* bladeRadiiD;
+    real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH;
+    real* bladeCoordsXDPreviousTimestep, * bladeCoordsYDPreviousTimestep, * bladeCoordsZDPreviousTimestep;
+    real* bladeCoordsXDCurrentTimestep, * bladeCoordsYDCurrentTimestep, * bladeCoordsZDCurrentTimestep;    
+    real* bladeVelocitiesXH, * bladeVelocitiesYH, * bladeVelocitiesZH;
+    real* bladeVelocitiesXDPreviousTimestep, * bladeVelocitiesYDPreviousTimestep, * bladeVelocitiesZDPreviousTimestep;
+    real* bladeVelocitiesXDCurrentTimestep, * bladeVelocitiesYDCurrentTimestep, * bladeVelocitiesZDCurrentTimestep;
+    real* bladeForcesXH, * bladeForcesYH, * bladeForcesZH;
+    real* bladeForcesXDPreviousTimestep, * bladeForcesYDPreviousTimestep, * bladeForcesZDPreviousTimestep;
+    real* bladeForcesXDCurrentTimestep, * bladeForcesYDCurrentTimestep, * bladeForcesZDCurrentTimestep;
+    uint* bladeIndicesH;
+    uint* bladeIndicesD; 
+    uint* boundingSphereIndicesH;
+    uint* boundingSphereIndicesD;
+    real* turbinePosXH, *turbinePosYH, *turbinePosZH, *omegasH, *azimuthsH, *yawsH, *diametersH;
+    real* turbinePosXD, *turbinePosYD, *turbinePosZD, *omegasD, *azimuthsD, *yawsD, *diametersD;
+    
+private:
+    std::vector<real> preInitPosX, preInitPosY, preInitPosZ, preInitDiameters, preInitOmegas, preInitAzimuths, preInitYaws;
+    std::vector<std::vector<real>> preInitBladeRadii;
+    const bool useHostArrays;
+    const real density;
+    real deltaT, deltaX;
+    const uint numberOfBladeNodes, numberOfBlades;
+    uint numberOfTurbines;
+    const real epsilon; // in m
+    const int level;
+    uint numberOfIndices;
+    uint numberOfNodes;
+    real forceRatio, factorGaussian, invEpsilonSqrd, invDeltaX;
+    int streamIndex;
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
deleted file mode 100644
index 71897bd21ea4fb299d3cc0ffa385506d4503f360..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
+++ /dev/null
@@ -1,423 +0,0 @@
-#include "ActuatorLine.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cuda/CudaGrid.h>
-#include "VirtualFluids_GPU/GPU/GeometryUtils.h"
-
-#include "Parameter/Parameter.h"
-#include "DataStructureInitializer/GridProvider.h"
-#include "GPU/CudaMemoryManager.h"
-
-__host__ __device__ __inline__ uint calcNode(uint bladeNode, uint nBladeNodes, uint blade, uint nBlades)
-{
-    return bladeNode+blade*nBladeNodes;
-}
-
-__host__ __device__ __inline__ void calcBladeAndBladeNode(uint node, uint& bladeNode, uint nBladeNodes, uint& blade, uint nBlades)
-{
-    blade = node/nBladeNodes;
-    bladeNode = node - blade*nBladeNodes;
-}
-
-__host__ __device__ __forceinline__ real distSqrd(real distX, real distY, real distZ)
-{
-    return distX*distX+distY*distY+distZ*distZ;
-}
-
-__host__ __device__ __inline__ void rotateFromBladeToGlobal(
-                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
-                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
-                            real& azimuth, real& yaw)
-{
-    real tmpX, tmpY, tmpZ;
-
-    rotateAboutX3D(azimuth, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, tmpX, tmpY, tmpZ);
-    rotateAboutZ3D(yaw, tmpX, tmpY, tmpZ, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF);
-
-}
-
-__host__ __device__ __inline__ void rotateFromGlobalToBlade(
-                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
-                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
-                            real& azimuth, real& yaw)
-{
-    real tmpX, tmpY, tmpZ;
-
-    invRotateAboutZ3D(yaw, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, tmpX, tmpY, tmpZ);
-    invRotateAboutX3D(azimuth, tmpX, tmpY, tmpZ, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF);
-}
-
-__global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, 
-                                      uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB, 
-                                      real* vx, real* vy, real* vz, 
-                                      real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ,
-                                      real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, 
-                                      uint nBlades, uint nBladeNodes, 
-                                      real azimuth, real yaw, real omega, 
-                                      real turbPosX, real turbPosY, real turbPosZ,
-                                      uint* bladeIndices, real velocityRatio, real invDeltaX)
-{
-    const uint x = threadIdx.x; 
-    const uint y = blockIdx.x;
-    const uint z = blockIdx.y;
-
-    const uint nx = blockDim.x;
-    const uint ny = gridDim.x;
-
-    const uint node = nx*(ny*z + y) + x;
-
-    uint bladeNode, blade;
-
-    calcBladeAndBladeNode(node, bladeNode, nBladeNodes, blade, nBlades);
-
-    if(node>=nBladeNodes*nBlades) return;
-
-    real bladeCoordX_BF = bladeCoordsX[node];
-    real bladeCoordY_BF = bladeCoordsY[node];
-    real bladeCoordZ_BF = bladeCoordsZ[node];
-
-    real bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF;
-
-    real localAzimuth = azimuth+blade*c2Pi/nBlades;
-
-    rotateFromBladeToGlobal(bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, 
-                            bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF,
-                            localAzimuth, yaw);
-
-    bladeCoordX_GF += turbPosX;
-    bladeCoordY_GF += turbPosY;
-    bladeCoordZ_GF += turbPosZ;
-
-    uint k, ke, kn, kt;
-    uint kne, kte, ktn, ktne;
-
-    k = findNearestCellBSW(bladeIndices[node], 
-                           gridCoordsX, gridCoordsY, gridCoordsZ, 
-                           bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, 
-                           neighborsX, neighborsY, neighborsZ, neighborsWSB);
-        
-    bladeIndices[node] = k;
-
-    getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ);
-
-    real dW, dE, dN, dS, dT, dB;
-
-    real distX = invDeltaX*(bladeCoordX_GF-gridCoordsX[k]);
-    real distY = invDeltaX*(bladeCoordY_GF-gridCoordsY[k]);
-    real distZ = invDeltaX*(bladeCoordZ_GF-gridCoordsZ[k]);
-
-    getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX, distY, distZ);
-
-    real bladeVelX_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx)*velocityRatio;
-    real bladeVelY_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy)*velocityRatio;
-    real bladeVelZ_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz)*velocityRatio;
-
-    real bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF;
-
-    rotateFromGlobalToBlade(bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF, 
-                            bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, 
-                            localAzimuth, yaw);
-
-    bladeVelocitiesX[node] = bladeVelX_BF;
-    bladeVelocitiesY[node] = bladeVelY_BF+omega*bladeCoordZ_BF;
-    bladeVelocitiesZ[node] = bladeVelZ_BF;
-}
-
-
-__global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ,
-                                real* gridForcesX, real* gridForcesY, real* gridForcesZ, 
-                                real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, 
-                                real* bladeForcesX, real* bladeForcesY,real* bladeForcesZ,
-                                uint nBlades, uint nBladeNodes,
-                                real azimuth, real yaw, real omega, 
-                                real turbPosX, real turbPosY, real turbPosZ,
-                                uint* gridIndices, uint nIndices, 
-                                real invEpsilonSqrd, real factorGaussian)
-{
-    const uint x = threadIdx.x; 
-    const uint y = blockIdx.x;
-    const uint z = blockIdx.y;
-
-    const uint nx = blockDim.x;
-    const uint ny = gridDim.x;
-
-    const uint index = nx*(ny*z + y) + x;
-
-    if(index>=nIndices) return;
-
-    uint gridIndex = gridIndices[index];
-
-    real gridCoordX_RF = gridCoordsX[gridIndex] - turbPosX;
-    real gridCoordY_RF = gridCoordsY[gridIndex] - turbPosY;
-    real gridCoordZ_RF = gridCoordsZ[gridIndex] - turbPosZ;
-
-    real gridForceX_RF = c0o1;
-    real gridForceY_RF = c0o1;
-    real gridForceZ_RF = c0o1;
-
-    real dAzimuth = c2Pi/nBlades;
-
-    for( uint blade=0; blade<nBlades; blade++)
-    { 
-        real localAzimuth = azimuth+blade*dAzimuth;
-
-        real gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF;
-
-        rotateFromGlobalToBlade(gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF,
-                                gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF,
-                                localAzimuth, yaw);
-        
-        for( uint bladeNode=0; bladeNode<nBladeNodes; bladeNode++)
-        {
-            uint node = calcNode(bladeNode, nBladeNodes, blade, nBlades);
-
-            real eta = factorGaussian*exp(-distSqrd(bladeCoordsX[node]-gridCoordX_BF, bladeCoordsY[node]-gridCoordY_BF, bladeCoordsZ[node]-gridCoordZ_BF)*invEpsilonSqrd);
-            
-            real forceX_RF, forceY_RF, forceZ_RF;
-
-            rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], 
-                                    forceX_RF, forceY_RF, forceZ_RF, 
-                                    localAzimuth, yaw);
-            
-            gridForceX_RF += forceX_RF*eta;
-            gridForceY_RF += forceY_RF*eta;
-            gridForceZ_RF += forceZ_RF*eta;
-        }
-    }
-
-    atomicAdd(&gridForcesX[gridIndex], gridForceX_RF);
-    atomicAdd(&gridForcesY[gridIndex], gridForceY_RF);
-    atomicAdd(&gridForcesZ[gridIndex], gridForceZ_RF);
-}
-
-
-void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager)
-{
-    if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorLine but BodyForce is not set in Parameter.");
-    this->initBladeRadii(cudaMemoryManager);
-    this->initBladeCoords(cudaMemoryManager);    
-    this->initBladeIndices(para, cudaMemoryManager);
-    this->initBladeVelocities(cudaMemoryManager);
-    this->initBladeForces(cudaMemoryManager);    
-    this->initBoundingSphere(para, cudaMemoryManager);
-}
-
-
-void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t)
-{
-    if (level != this->level) return;
-
-    cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
-
-    vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nNodes);
-
-    interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads >>>(
-        para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ,        
-        para->getParD(this->level)->neighborX, para->getParD(this->level)->neighborY, para->getParD(this->level)->neighborZ, para->getParD(this->level)->neighborInverse,
-        para->getParD(this->level)->velocityX, para->getParD(this->level)->velocityY, para->getParD(this->level)->velocityZ,
-        this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
-        this->bladeVelocitiesXD, this->bladeVelocitiesYD, this->bladeVelocitiesZD,  
-        this->nBlades, this->nBladeNodes,
-        this->azimuth, this->yaw, this->omega, 
-        this->turbinePosX, this->turbinePosY, this->turbinePosZ,
-        this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX);
-
-    cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this);
-
-    this->calcBladeForces();
-
-    cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
-
-    vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nIndices);
-
-    applyBodyForces<<<sphereGrid.grid, sphereGrid.threads>>>(
-        para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ,        
-        para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP,        
-        this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD,  
-        this->bladeForcesXD, this->bladeForcesYD, this->bladeForcesZD,
-        this->nBlades, this->nBladeNodes,
-        this->azimuth, this->yaw, this->omega, 
-        this->turbinePosX, this->turbinePosY, this->turbinePosZ,
-        this->boundingSphereIndicesD, this->nIndices,
-        this->invEpsilonSqrd, this->factorGaussian);
-
-    this->azimuth = fmod(this->azimuth+this->omega*this->deltaT,c2Pi);
-}
-
-
-void ActuatorLine::free(Parameter* para, CudaMemoryManager* cudaMemoryManager)
-{
-    cudaMemoryManager->cudaFreeBladeRadii(this);
-    cudaMemoryManager->cudaFreeBladeCoords(this);
-    cudaMemoryManager->cudaFreeBladeVelocities(this);
-    cudaMemoryManager->cudaFreeBladeForces(this);
-    cudaMemoryManager->cudaFreeBladeIndices(this);
-    cudaMemoryManager->cudaFreeSphereIndices(this);
-}
-
-
-void ActuatorLine::calcForcesEllipticWing()
-{
-    uint node;
-    real u_rel, v_rel, u_rel_sq;
-    real phi;
-    real Cl = c1o1;
-    real Cd = c0o1;
-    real c0 = c1o1;
-
-    real c, Cn, Ct;
-
-    for( uint blade=0; blade<this->nBlades; blade++)
-    { 
-        for( uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++)
-        {        
-            node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades);
-
-            u_rel = this->bladeVelocitiesXH[node];
-            v_rel = this->bladeVelocitiesYH[node];
-            u_rel_sq = u_rel*u_rel+v_rel*v_rel;
-            phi = atan2(u_rel, v_rel);
-            
-            real tmp = c4o1*this->bladeRadiiH[bladeNode]/this->diameter-c1o1;
-            c = c0 * sqrt( c1o1- tmp*tmp );
-            Cn = Cl*cos(phi)+Cd*sin(phi);
-            Ct = Cl*sin(phi)-Cd*cos(phi);
-        
-            this->bladeForcesXH[node] = -c1o2*u_rel_sq*c*this->density*Cn;
-            this->bladeForcesYH[node] = -c1o2*u_rel_sq*c*this->density*Ct;
-            this->bladeForcesZH[node] = c0o1;
-        }
-    }
-}
-
-void ActuatorLine::calcBladeForces()
-{
-    this->calcForcesEllipticWing();
-}
-
-void ActuatorLine::initBladeRadii(CudaMemoryManager* cudaMemoryManager)
-{   
-    cudaMemoryManager->cudaAllocBladeRadii(this);
-
-    real dr = c1o2*this->diameter/this->nBladeNodes;  
-
-    for(uint node=0; node<this->nBladeNodes; node++)
-    {
-        this->bladeRadiiH[node] = dr*(node+1);
-    }
-    cudaMemoryManager->cudaCopyBladeRadiiHtoD(this);
-
-    real dxOPiSqrtEps = pow(this->deltaX/(this->epsilon*sqrt(cPi)),c3o1);
-    this->factorGaussian = dr*dxOPiSqrtEps/this->forceRatio;
-}
-
-void ActuatorLine::initBladeCoords(CudaMemoryManager* cudaMemoryManager)
-{   
-    cudaMemoryManager->cudaAllocBladeCoords(this);
-
-    for(uint blade=0; blade<this->nBlades; blade++)
-    {
-        for(uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++)
-        {
-            uint node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades);
-
-            this->bladeCoordsXH[node] = c0o1;
-            this->bladeCoordsYH[node] = c0o1;
-            this->bladeCoordsZH[node] = this->bladeRadiiH[bladeNode];
-        }
-    }
-    cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
-}
-
-void ActuatorLine::initBladeVelocities(CudaMemoryManager* cudaMemoryManager)
-{   
-    cudaMemoryManager->cudaAllocBladeVelocities(this);
-
-    for(uint node=0; node<this->nNodes; node++)
-    {
-        this->bladeVelocitiesXH[node] = c0o1;
-        this->bladeVelocitiesYH[node] = c0o1;
-        this->bladeVelocitiesZH[node] = c0o1;
-    }
-    cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this);
-}
-
-void ActuatorLine::initBladeForces(CudaMemoryManager* cudaMemoryManager)
-{   
-    cudaMemoryManager->cudaAllocBladeForces(this);
-
-    for(uint node=0; node<this->nNodes; node++)
-    {
-        this->bladeForcesXH[node] = c0o1;
-        this->bladeForcesYH[node] = c0o1;
-        this->bladeForcesZH[node] = c0o1;
-    }
-    cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
-}
-
-void ActuatorLine::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager)
-{   
-    cudaMemoryManager->cudaAllocBladeIndices(this);
-
-    for(uint node=0; node<this->nNodes; node++)
-
-    {
-        this->bladeIndicesH[node] = 1;
-    }
-    cudaMemoryManager->cudaCopyBladeIndicesHtoD(this);
-}
-
-void ActuatorLine::initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMemoryManager)
-{
-    // Actuator line exists only on 1 level
-    std::vector<int> nodesInSphere;
-    real sphereRadius = c1o2*this->diameter+c4o1*this->epsilon;
-    real sphereRadiusSqrd = sphereRadius*sphereRadius;
-
-    for (uint j = 1; j <= para->getParH(this->level)->numberOfNodes; j++)
-    {
-        const real distX = para->getParH(this->level)->coordinateX[j]-this->turbinePosX;
-        const real distY = para->getParH(this->level)->coordinateY[j]-this->turbinePosY;
-        const real distZ = para->getParH(this->level)->coordinateZ[j]-this->turbinePosZ;
-        if(distSqrd(distX,distY,distZ) < sphereRadiusSqrd) nodesInSphere.push_back(j);
-    }
-
-    this->nIndices = uint(nodesInSphere.size());
-    cudaMemoryManager->cudaAllocSphereIndices(this);
-    std::copy(nodesInSphere.begin(), nodesInSphere.end(), this->boundingSphereIndicesH);
-    cudaMemoryManager->cudaCopySphereIndicesHtoD(this);
-}
-
-void ActuatorLine::setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
-{ 
-
-    for(uint node=0; node<this->nNodes; node++)
-    {
-        this->bladeCoordsXH[node] = _bladeCoordsX[node];
-        this->bladeCoordsYH[node] = _bladeCoordsY[node];
-        this->bladeCoordsZH[node] = _bladeCoordsZ[node];
-    }
-}
-
-void ActuatorLine::setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ)
-{ 
-    for(uint node=0; node<this->nNodes; node++)
-    {
-        this->bladeVelocitiesXH[node] = _bladeVelocitiesX[node];
-        this->bladeVelocitiesYH[node] = _bladeVelocitiesY[node];
-        this->bladeVelocitiesZH[node] = _bladeVelocitiesZ[node];
-    }
-}
-
-void ActuatorLine::setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ)
-{ 
-    for(uint node=0; node<this->nNodes; node++)
-    {
-        this->bladeForcesXH[node] = _bladeForcesX[node];
-        this->bladeForcesYH[node] = _bladeForcesY[node];
-        this->bladeForcesZH[node] = _bladeForcesZ[node];
-    }
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
deleted file mode 100644
index b44c89c5020eb206baa3bba1994b1e45f760c3bb..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef ActuatorLine_H
-#define ActuatorLine_H
-
-#include "PreCollisionInteractor.h"
-#include "PointerDefinitions.h"
-#include "VirtualFluids_GPU_export.h"
-#include "lbm/constants/NumericConstants.h"
-
-class Parameter;
-class GridProvider;
-
-using namespace vf::lbm::constant;
-class VIRTUALFLUIDS_GPU_EXPORT ActuatorLine : public PreCollisionInteractor
-{
-public:
-    ActuatorLine(
-        const uint _nBlades,
-        const real _density,
-        const uint _nBladeNodes,
-        const real _epsilon,
-        real _turbinePosX, real _turbinePosY, real _turbinePosZ,
-        const real _diameter,
-        int _level,
-        const real _deltaT,
-        const real _deltaX
-    ) : nBlades(_nBlades),
-        density(_density),
-        nBladeNodes(_nBladeNodes), 
-        epsilon(_epsilon),
-        turbinePosX(_turbinePosX), turbinePosY(_turbinePosY), turbinePosZ(_turbinePosZ),
-        diameter(_diameter),
-        level(_level),
-        PreCollisionInteractor()
-    {
-        this->deltaT = _deltaT*exp2(-this->level);
-        this->deltaX = _deltaX*exp2(-this->level);
-        this->invDeltaX = c1o1/this->deltaX;
-        this->forceRatio = this->density*pow(this->deltaX,4)*pow(this->deltaT,-2);
-        this->invEpsilonSqrd = c1o1/(this->epsilon*this->epsilon);
-        this->nNodes = this->nBladeNodes*this->nBlades;
-        this->omega = c1o1;
-        this->azimuth = c0o1;
-        this->yaw = c0o1;
-    };
-
-    virtual ~ActuatorLine(){};
-
-    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override;
-    void interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t) override;
-    void free(Parameter* para, CudaMemoryManager* cudaMemoryManager) override;
-    void write(uint t);
-
-    uint getNBladeNodes(){ return this->nBladeNodes; };
-    uint getNBlades(){ return this->nBlades;};
-    uint getNIndices(){ return this->nIndices; };
-    uint getNNodes(){ return this->nNodes; };
-    real getOmega(){ return this->omega; };
-    real getAzimuth(){ return this->azimuth; };
-    real getYaw(){ return this->yaw; };
-    real getDensity(){ return this->density; };
-    real getPositionX(){ return this->turbinePosX; };
-    real getPositionY(){ return this->turbinePosY; };
-    real getPositionZ(){ return this->turbinePosZ; };
-    real* getBladeRadii(){ return this->bladeRadiiH; };
-    real* getBladeCoordsX(){ return this->bladeCoordsXH; };
-    real* getBladeCoordsY(){ return this->bladeCoordsYH; };
-    real* getBladeCoordsZ(){ return this->bladeCoordsZH; };
-    real* getBladeVelocitiesX(){ return this->bladeVelocitiesXH; };
-    real* getBladeVelocitiesY(){ return this->bladeVelocitiesYH; };
-    real* getBladeVelocitiesZ(){ return this->bladeVelocitiesZH; };
-    real* getBladeForcesX(){ return this->bladeForcesXH; };
-    real* getBladeForcesY(){ return this->bladeForcesYH; };
-    real* getBladeForcesZ(){ return this->bladeForcesZH; };
-
-    void setOmega(real _omega){ this->omega = _omega; };
-    void setAzimuth(real _azimuth){ this->azimuth = _azimuth; };
-    void setYaw(real _yaw){ this->yaw = _yaw; };
-    void setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ);
-    void setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ);
-    void setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ);
-    virtual void calcBladeForces();
-
-private:
-    void initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-
-    void initBladeRadii(CudaMemoryManager* cudaMemoryManager);
-    void initBladeCoords(CudaMemoryManager* cudaMemoryManager);
-    void initBladeVelocities(CudaMemoryManager* cudaMemoryManager);
-    void initBladeForces(CudaMemoryManager* cudaMemoryManager);
-    void initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-
-    void calcForcesEllipticWing();
-
-public:
-    real* bladeRadiiH;
-    real* bladeRadiiD;
-    real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH;
-    real* bladeCoordsXD, * bladeCoordsYD, * bladeCoordsZD;
-    real* bladeVelocitiesXH, * bladeVelocitiesYH, * bladeVelocitiesZH;
-    real* bladeVelocitiesXD, * bladeVelocitiesYD, * bladeVelocitiesZD;
-    real* bladeForcesXH, * bladeForcesYH, * bladeForcesZH;
-    real* bladeForcesXD, * bladeForcesYD, * bladeForcesZD;
-    uint* bladeIndicesH;
-    uint* bladeIndicesD; 
-    uint* boundingSphereIndicesH;
-    uint* boundingSphereIndicesD;
-    
-private:
-    const real density;
-    real turbinePosX, turbinePosY, turbinePosZ;
-    real omega, azimuth, yaw, deltaT, deltaX, invDeltaX, forceRatio, factorGaussian, invEpsilonSqrd;
-    const real diameter;
-    const uint nBladeNodes;
-    const uint nBlades;
-    const real epsilon; // in m
-    const int level;
-    uint nIndices, nNodes;
-};
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
index a9b233f3035890c2617d3a00b639f995be6c218f..f9a87f613e7607301e59a7c1e67eb556418892e4 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
@@ -33,6 +33,7 @@ public:
     virtual void init(Parameter *para, GridProvider *gridProvider, CudaMemoryManager *cudaMemoryManager) = 0;
     virtual void interact(Parameter *para, CudaMemoryManager *cudaMemoryManager, int level, uint t) = 0;
     virtual void free(Parameter *para, CudaMemoryManager *cudaMemoryManager) = 0;
+    virtual void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) = 0;
 
 protected:
     uint updateInterval;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
new file mode 100644
index 0000000000000000000000000000000000000000..1a8260ef936e2707fb38fbbba71cdbfac692f350
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
@@ -0,0 +1,359 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PrecursorWriter.cu
+//! \ingroup PreCollisionInteractor
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
+#include "PrecursorWriter.h"
+#include "basics/writer/WbWriterVtkXmlImageBinary.h"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+#include "cuda/CudaGrid.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+
+#include "Core/StringUtilities/StringUtil.h"
+
+#include "Parameter/Parameter.h"
+#include "DataStructureInitializer/GridProvider.h"
+#include "GPU/CudaMemoryManager.h"
+
+using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//TODO check everything for multiple level
+void index1d(int& idx, int y, int z, int ny, int nz)
+{
+    idx = y+ny*z;
+}
+
+void index2d(int idx, int& y, int& z, int ny, int nz)
+{
+    z = idx/ny;
+    y = idx-ny*z;
+}
+
+__inline__ __host__ __device__ uint linearIdx(const uint component, const uint node, const uint timestep, const uint numberOfComponents, const uint numberOfNodes)
+{
+    return node+numberOfNodes*(component+numberOfComponents*timestep);
+}
+
+__inline__ __host__ __device__ uint linearIdx(const uint component, const uint node, const uint numberOfNodes)
+{
+    return node+component*numberOfNodes;
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void fillArrayVelocities(const uint numberOfPrecursorNodes, 
+                                    uint* indices, 
+                                    real *precursorData,
+                                    real *vx,
+                                    real *vy,
+                                    real *vz,
+                                    real velocityRatio)
+
+
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
+
+    if(nodeIndex>=numberOfPrecursorNodes) return;
+
+    precursorData[linearIdx(0u, nodeIndex, numberOfPrecursorNodes)] = vx[indices[nodeIndex]]*velocityRatio;
+    precursorData[linearIdx(1u, nodeIndex, numberOfPrecursorNodes)] = vy[indices[nodeIndex]]*velocityRatio;
+    precursorData[linearIdx(2u, nodeIndex, numberOfPrecursorNodes)] = vz[indices[nodeIndex]]*velocityRatio;
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void fillArrayDistributions( uint numberOfPrecursorNodes, 
+                                        uint* indices, 
+                                        real* precursorData,
+                                        real* distributions,
+                                        uint* neighborX, uint* neighborY, uint* neighborZ,
+                                        bool isEvenTimestep,
+                                        unsigned long numberOfLBnodes)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
+
+    if(nodeIndex>=numberOfPrecursorNodes) return;
+
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+    
+    ////////////////////////////////////////////////////////////////////////////////
+    // ! - Set neighbor indices (necessary for indirect addressing)
+    uint k_000 = indices[nodeIndex];
+    // uint k_M00 = neighborX[k_000];
+    uint k_0M0 = neighborY[k_000];
+    uint k_00M = neighborZ[k_000];
+    // uint k_MM0 = neighborY[k_M00];
+    // uint k_M0M = neighborZ[k_M00];
+    uint k_0MM = neighborZ[k_0M0];
+    // uint k_MMM = neighborZ[k_MM0];
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Get local distributions in PX directions
+    //!
+    precursorData[linearIdx(PrecP00, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000];
+    precursorData[linearIdx(PrecPP0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000];
+    precursorData[linearIdx(PrecPM0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0];
+    precursorData[linearIdx(PrecP0P, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000];
+    precursorData[linearIdx(PrecP0M, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M];
+    precursorData[linearIdx(PrecPPP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000];
+    precursorData[linearIdx(PrecPMP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0];
+    precursorData[linearIdx(PrecPPM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M];
+    precursorData[linearIdx(PrecPMM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM];
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager)
+{
+    VF_LOG_INFO("PrecursorWriter: Start initializing...");
+    VF_LOG_INFO("Writing yz-planes at x={}m every {}. timestep, starting at t={}", this->xPos, this->tSave, this->tStartOut);
+
+    precursorStructs.resize(para->getMaxLevel()+1);
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+
+        real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]);
+        int maxPoints = (int((yMax-yMin)/dx)+1)* (int((zMax-zMin)/dx)+1);
+
+        real lowestY, lowestZ, highestY, highestZ;
+
+        lowestY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes-1];
+        highestY = para->getParH(level)->coordinateY[1];        
+        
+        lowestZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes-1];
+        highestZ = para->getParH(level)->coordinateZ[1];
+
+        std::vector<uint> indicesOnGrid;
+        std::vector<int> indicesOnPlane;
+        std::vector<real> coordY, coordZ;
+
+        for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
+        {
+            real pointCoordX = para->getParH(level)->coordinateX[pos];
+            real pointCoordY = para->getParH(level)->coordinateY[pos];
+            real pointCoordZ = para->getParH(level)->coordinateZ[pos];
+            if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID &&
+                pointCoordX < (dx+xPos) && pointCoordX >= xPos       &&
+                pointCoordY<=yMax && pointCoordY>=yMin               && 
+                pointCoordZ<=zMax && pointCoordZ>=zMin)
+            {
+                highestY = max(highestY, pointCoordY);
+                highestZ = max(highestZ, pointCoordZ);
+
+                lowestY = min(lowestY, pointCoordY);
+                lowestZ = min(lowestZ, pointCoordZ);
+                indicesOnGrid.push_back((uint)pos);    
+                coordY.push_back(pointCoordY);            
+                coordZ.push_back(pointCoordZ);    
+            }
+        }
+        if(indicesOnGrid.size()==0)
+            throw std::runtime_error("PrecursorWriter did not find any points on the grid");
+
+        int ny = int((highestY-lowestY)/dx)+1;
+        int nz = int((highestZ-lowestZ)/dx)+1;
+
+        for(uint i=0;i<indicesOnGrid.size(); i++)
+        {
+                int idxY = int((coordY[i]-lowestY)/dx);
+                int idxZ = int((coordZ[i]-lowestZ)/dx);
+                int idx;
+                index1d(idx, idxY, idxZ, ny, nz);
+                indicesOnPlane.push_back(idx);
+        }
+
+        precursorStructs[level] = SPtr<PrecursorStruct>(new PrecursorStruct);
+        precursorStructs[level]->numberOfPointsInBC = (uint)indicesOnGrid.size();
+        precursorStructs[level]->indicesOnPlane = (int*) malloc(precursorStructs[level]->numberOfPointsInBC*sizeof(int));
+        precursorStructs[level]->spacing = makeUbTuple(dx, dx, tSave*para->getTimeRatio()*pow(2,-level));
+        precursorStructs[level]->origin = makeUbTuple(lowestY, lowestZ);
+        precursorStructs[level]->extent = makeUbTuple(0, ny-1, 0, nz-1);
+        precursorStructs[level]->numberOfPointsInData = ny*nz;
+        precursorStructs[level]->numberOfTimestepsPerFile = min(para->getlimitOfNodesForVTK()/(ny*nz), maxtimestepsPerFile);
+        precursorStructs[level]->numberOfFilesWritten = 0;
+        precursorStructs[level]->numberOfTimestepsBuffered = 0;
+        
+        switch (outputVariable)
+        {
+        case OutputVariable::Velocities:
+            precursorStructs[level]->numberOfQuantities = 3;
+            break;
+        case OutputVariable::Distributions:
+            precursorStructs[level]->numberOfQuantities = 9;
+            break;
+        
+        default:
+            break;
+        }
+
+        cudaManager->cudaAllocPrecursorWriter(this, level);
+    
+        std::copy(indicesOnGrid.begin(), indicesOnGrid.end(), precursorStructs[level]->indicesH);
+        std::copy(indicesOnPlane.begin(), indicesOnPlane.end(), precursorStructs[level]->indicesOnPlane);
+
+        cudaManager->cudaCopyPrecursorWriterIndicesHtoD(this, level);
+
+        VF_LOG_INFO("Found {} points in precursor plane on level {}", precursorStructs[level]->numberOfPointsInBC, level);
+    }
+    VF_LOG_INFO("PrecursorWriter: Done initializing.");
+}
+
+
+void PrecursorWriter::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
+{
+    uint t_level         = para->getTimeStep(level, t, true);
+    uint tStartOut_level = tStartOut*pow(2, level);
+    uint tEnd_level      = para->getTimestepEnd()*pow(2, level);
+
+    if(t_level>tStartOut_level && ((t_level-tStartOut_level) % tSave)==0)
+    {
+        vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, precursorStructs[level]->numberOfPointsInBC);
+
+        if(this->outputVariable==OutputVariable::Velocities)
+        {
+            fillArrayVelocities<<<grid.grid, grid.threads>>>(   precursorStructs[level]->numberOfPointsInBC, precursorStructs[level]->indicesD, 
+                                                                precursorStructs[level]->bufferD, 
+                                                                para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                para->getVelocityRatio());
+            getLastCudaError("In PrecursorWriter::interact fillArrayVelocities execution failed");
+        }
+        else if(this->outputVariable==OutputVariable::Distributions)
+        {
+            fillArrayDistributions<<<grid.grid, grid.threads>>>(precursorStructs[level]->numberOfPointsInBC, precursorStructs[level]->indicesD, 
+                                                                precursorStructs[level]->bufferD,
+                                                                para->getParD(level)->distributions.f[0],
+                                                                para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                para->getEvenOrOdd(level), para->getParD(level)->numberOfNodes);
+            getLastCudaError("In PrecursorWriter::interact fillArrayDistributions execution failed");
+        }
+        cudaManager->cudaCopyPrecursorWriterOutputVariablesDtoH(this, level);
+
+        // switch device buffer and data pointer so precursor data is gathered in buffer and copied from bufferD to bufferH
+        real *tmp = precursorStructs[level]->bufferD;
+        precursorStructs[level]->bufferD = precursorStructs[level]->dataD;
+        precursorStructs[level]->dataD = tmp;
+
+        precursorStructs[level]->numberOfTimestepsBuffered++;
+
+        if(precursorStructs[level]->numberOfTimestepsBuffered >= precursorStructs[level]->numberOfTimestepsPerFile || t == para->getTimestepEnd())
+        {
+        // switch host buffer and data pointer so precursor data is copied in buffer and written from data
+
+            tmp = precursorStructs[level]->bufferH;
+            precursorStructs[level]->bufferH = precursorStructs[level]->dataH;
+            precursorStructs[level]->dataH = tmp;
+
+            writeFuture.wait();
+            writeFuture = std::async(std::launch::async, [this](Parameter* para, uint level, uint timesteps){ this->write(para, level, timesteps); }, para, level, precursorStructs[level]->numberOfTimestepsBuffered);
+            precursorStructs[level]->numberOfTimestepsBuffered = 0;
+        }
+    }
+}
+
+
+void PrecursorWriter::free(Parameter* para, CudaMemoryManager* cudaManager)
+{
+    writeFuture.wait();
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+        if(getPrecursorStruct(level)->numberOfTimestepsBuffered>0)
+            write(para, level, getPrecursorStruct(level)->numberOfTimestepsBuffered);
+
+        cudaManager->cudaFreePrecursorWriter(this, level);
+    }
+}
+
+
+void PrecursorWriter::write(Parameter* para, int level, uint numberOfTimestepsBuffered)
+{
+    std::string fname = this->makeFileName(fileName, level, para->getMyProcessID(), precursorStructs[level]->numberOfFilesWritten) + getWriter()->getFileExtension();
+    std::string wholeName = outputPath + "/" + fname;
+
+    uint numberOfPointsInData = precursorStructs[level]->numberOfPointsInData;
+
+    int startTime = precursorStructs[level]->numberOfFilesWritten*precursorStructs[level]->numberOfTimestepsPerFile;
+
+    UbTupleInt6 extent = makeUbTuple(   val<1>(precursorStructs[level]->extent),    val<2>(precursorStructs[level]->extent), 
+                                        val<3>(precursorStructs[level]->extent),    val<4>(precursorStructs[level]->extent), 
+                                        startTime,                          startTime+(int)numberOfTimestepsBuffered-1);
+
+    UbTupleFloat3 origin = makeUbTuple( val<1>(precursorStructs[level]->origin), val<2>(precursorStructs[level]->origin), 0.f);
+
+    std::vector<std::vector<double>> nodedata;
+    
+    for(uint quant=0; quant<precursorStructs[level]->numberOfQuantities; quant++)
+    {
+        std::vector<double> doubleArr(numberOfPointsInData*numberOfTimestepsBuffered, NAN);
+        for( uint timestep=0; timestep<numberOfTimestepsBuffered; timestep++)
+        {
+            for (uint pos=0; pos < precursorStructs[level]->numberOfPointsInBC; pos++)
+            {
+                int indexOnPlane = precursorStructs[level]->indicesOnPlane[pos]+timestep*numberOfPointsInData;
+                doubleArr[indexOnPlane] = double(precursorStructs[level]->dataH[linearIdx(quant, pos, timestep, precursorStructs[level]->numberOfQuantities, precursorStructs[level]->numberOfPointsInBC)]);
+            }
+        }
+        nodedata.push_back(doubleArr);
+    }
+
+    std::vector<std::vector<double>> celldata;
+    getWriter()->writeData(wholeName, nodedatanames, celldatanames, nodedata, celldata, extent, origin, precursorStructs[level]->spacing, extent, this->writePrecision);
+    precursorStructs[level]->numberOfFilesWritten++;
+}
+
+std::string PrecursorWriter::makeFileName(std::string fileName, int level, int id, uint numberOfFilesWritten)
+{
+    return fileName + "_lev_" + StringUtil::toString<int>(level)
+                    + "_ID_" + StringUtil::toString<int>(id)
+                    + "_File_" + StringUtil::toString<int>(numberOfFilesWritten);
+}
+
+void PrecursorWriter::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
+{
+    for(uint level=0; level<(uint)para->getMaxLevel(); level++)
+    {
+        if(outputVariable==OutputVariable::Velocities)
+        {
+            std::vector<uint> indices(precursorStructs[level]->indicesH, precursorStructs[level]->indicesH+precursorStructs[level]->numberOfPointsInBC);
+            gridProvider->tagFluidNodeIndices(indices, CollisionTemplate::WriteMacroVars, level);
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..264023b58ba6db46b50f6a85b334c530864a0b8f
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
@@ -0,0 +1,161 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PrecursorWriter.h
+//! \author Henry Korb, Henrik Asmuth
+//! \date 05/12/2022
+//! \brief Probe writing planes of data to be used as inflow data in successor simulation using PrecursorBC
+//!
+//! The probe writes out yz-planes at a specific x position ( \param xPos ) of either velocity or distributions 
+//! that can be read by PrecursorBC as inflow data.
+//=======================================================================================
+
+
+#ifndef PRECURSORPROBE_H_
+#define PRECURSORPROBE_H_
+
+#include "PreCollisionInteractor.h"
+#include "WbWriterVtkXmlImageBinary.h"
+#include "LBM/LB.h"
+#include <string>
+#include <vector>
+#include <future>
+#include "PointerDefinitions.h"
+#include "Logger.h"
+
+class Parameter;
+class CudaMemoryManager;
+class GridProvider;
+
+enum class OutputVariable {
+   //! - Velocities
+    Velocities,
+    //! - Distributions
+    Distributions    
+};
+
+static constexpr uint PrecP00 = 0;
+static constexpr uint PrecPP0 = 1;
+static constexpr uint PrecPM0 = 2;
+static constexpr uint PrecP0P = 3;
+static constexpr uint PrecP0M = 4;
+static constexpr uint PrecPPP = 5;
+static constexpr uint PrecPMP = 6;
+static constexpr uint PrecPPM = 7;
+static constexpr uint PrecPMM = 8;
+
+struct PrecursorStruct
+{
+    uint numberOfPointsInBC, numberOfPointsInData, numberOfTimestepsPerFile, numberOfFilesWritten, numberOfTimestepsBuffered;
+    uint *indicesH, *indicesD;
+    real *dataH, *dataD;
+    real *bufferH, *bufferD;
+    uint numberOfQuantities;
+    UbTupleInt4 extent;
+    UbTupleFloat2 origin;
+    UbTupleFloat3 spacing;
+    int* indicesOnPlane;
+    cudaStream_t stream;
+};
+
+class PrecursorWriter : public PreCollisionInteractor
+{
+public:
+    PrecursorWriter(
+        const std::string _fileName,
+        const std::string _outputPath,
+        real _xPos,
+        real _yMin, real _yMax,
+        real _zMin, real _zMax,
+        uint _tStartOut,
+        uint _tSave,
+        OutputVariable _outputVariable,
+        uint _maxTimestepsPerFile=uint(1e4)
+    ): 
+    fileName(_fileName), 
+    outputPath(_outputPath), 
+    xPos(_xPos),
+    yMin(_yMin),
+    yMax(_yMax),
+    zMin(_zMin),
+    zMax(_zMax),
+    tStartOut(_tStartOut), 
+    tSave(_tSave),
+    outputVariable(_outputVariable),
+    maxtimestepsPerFile(_maxTimestepsPerFile)
+    {
+        nodedatanames = determineNodeDataNames();
+        writeFuture = std::async([](){});
+    };
+
+    void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void free(Parameter* para, CudaMemoryManager* cudaManager) override;
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
+
+    OutputVariable getOutputVariable(){ return this->outputVariable; }
+
+    SPtr<PrecursorStruct> getPrecursorStruct(int level){return precursorStructs[level];}
+    static std::string makeFileName(std::string fileName, int level, int id, uint part);
+
+    void setWritePrecision(uint _writePrecision){ this->writePrecision=_writePrecision;}
+    
+private:
+    WbWriterVtkXmlImageBinary* getWriter(){ return WbWriterVtkXmlImageBinary::getInstance(); };
+    void write(Parameter* para, int level, uint numberOfTimestepsBuffered);
+
+    std::vector<std::string> determineNodeDataNames()
+    {
+        switch (outputVariable)
+        {
+        case OutputVariable::Velocities:
+            return {"vx", "vy", "vz"};
+            break;       
+        case OutputVariable::Distributions:
+            return {"fP00", "fPP0", "fPM0", "fP0P", "fP0M", "fPPP", "fPMP", "fPPM", "fPMM"};
+            break;
+        
+        default:
+            throw std::runtime_error("Invalid OutputVariable for PrecursorWriter");
+            break;
+        }
+    }
+
+private:
+    std::vector<SPtr<PrecursorStruct>> precursorStructs;
+    std::string fileName, outputPath;
+    std::vector<std::string> nodedatanames;
+    std::vector<std::string> celldatanames;
+    uint tStartOut, tSave, maxtimestepsPerFile;
+    real xPos, yMin, yMax, zMin, zMax;
+    OutputVariable outputVariable;
+    std::future<void> writeFuture;
+    uint writePrecision = 8;
+};
+
+#endif //PRECURSORPROBE_H_
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
index f5b520acfad74f6787e9e657fce3ccdceed9d539..e89d392b5d4bf5983f9bb47642fef81d0f06cc89 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
@@ -15,6 +15,7 @@
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
+#include "GPU/GPU_Interface.h"
 
 #include <algorithm>
 
@@ -235,7 +236,7 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
                                 }
 
     // Find all points along the normal direction
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t j = 1; j < para->getParH(level)->numberOfNodes; j++ )
     {
         if(para->getParH(level)->typeOfGridNode[j] == GEO_FLUID)
         {   
@@ -250,16 +251,16 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
     std::sort(pointCoordsNormal->begin(), pointCoordsNormal->end());
     
     // Find all pointCoords in the first plane 
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {
-        if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID && pointCoordsNormal_par[j] == pointCoordsNormal->at(0)) 
+        if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID && pointCoordsNormal_par[pos] == pointCoordsNormal->at(0)) 
         {
             //not needed in current state, might become relevant for two-point correlations
             // pointCoordsNormal->push_back( pointCoordsNormal_par[j] ); 
             // pointCoordsInplane1->push_back( pointCoordsInplane1_par[j] );
             // pointCoordsInplane2->push_back( pointCoordsInplane2_par[j] );
 
-            probeIndices_level.push_back(j);
+            probeIndices_level.push_back((int)pos);
         }
     }
 }
@@ -268,6 +269,23 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
 
 void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t_level, int level)
 {   
+    // Compute macroscopic variables in entire domain
+    CalcMacCompSP27(
+        para->getParD(level)->velocityX, 
+        para->getParD(level)->velocityY, 
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->rho, 
+        para->getParD(level)->pressure, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ, 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->numberofthreads, 
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("In PlanarAverageProbe Kernel CalcMacSP27 execution failed");
+
     // Definition of normal and inplane directions for moveIndices kernels
     uint *neighborNormal, *neighborInplane1, *neighborInplane2;
     if( this->planeNormal == 'x' )
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
index d11f8e76e4d13113b201af5494b7d0cfcfe18353..3d3533f74501e776f9150c83c9d9101a0be7ecbc 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
@@ -74,6 +74,7 @@ public:
         planeNormal(_planeNormal)
 
     {   
+        if (_tStartTmpAvg<_tStartAvg)   throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!");
         if(!(_planeNormal == 'x' || _planeNormal == 'y' || _planeNormal == 'z')) 
             throw std::runtime_error("PlanarAverageProbe: planeNormal must be 'x', 'y' or 'z'!");
     }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index 7d1c0205219737e4b28acbb1a893a0a6071ae9de..f55045505bff0e3b5b0b1426be4e9e1a3832d088 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -76,11 +76,11 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
                             int level)
 {
     real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]);
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {
-        real pointCoordX = para->getParH(level)->coordinateX[j];
-        real pointCoordY = para->getParH(level)->coordinateY[j];
-        real pointCoordZ = para->getParH(level)->coordinateZ[j];
+        real pointCoordX = para->getParH(level)->coordinateX[pos];
+        real pointCoordY = para->getParH(level)->coordinateY[pos];
+        real pointCoordZ = para->getParH(level)->coordinateZ[pos];
         real distX = pointCoordX - this->posX;
         real distY = pointCoordY - this->posY;
         real distZ = pointCoordZ - this->posZ;
@@ -88,7 +88,7 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
         if( distX <= this->deltaX && distY <= this->deltaY && distZ <= this->deltaZ &&
             distX >=0.f && distY >=0.f && distZ >=0.f)
         {
-            probeIndices_level.push_back(j);
+            probeIndices_level.push_back((int)pos);
             distX_level.push_back( distX/dx );
             distY_level.push_back( distY/dx );
             distZ_level.push_back( distZ/dx );
@@ -106,4 +106,14 @@ void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* p
     para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, para->getParD(level)->rho, 
     para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, 
     probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD);
+}
+
+void PlaneProbe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
+{
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+        SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
+        std::vector<uint> probeIndices( probeStruct->pointIndicesH, probeStruct->pointIndicesH+probeStruct->nIndices);
+        gridProvider->tagFluidNodeIndices( probeIndices, CollisionTemplate::WriteMacroVars, level);
+    }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
index 3440c01020f9b3505be7148024e47373b76648ff..180169707a6d7f3f7975f6a2bc4009f7c0aba527 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h
@@ -54,7 +54,7 @@ public:
     ): Probe(_probeName, 
              _outputPath,
              _tStartAvg, 
-             0,
+             _tStartAvg+1,
              _tAvg,
              _tStartOut, 
              _tOut,
@@ -72,6 +72,8 @@ public:
         this->deltaZ = _deltaZ; 
     }
 
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
+
 private:
     bool isAvailableStatistic(Statistic _variable) override;
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index e78a98f02ac2093fc46b4daa4a2485ed1395275b..89e1f6b87687ed42c079415a5340f1d385c8d62c 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -75,20 +75,20 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 {
 
     real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]);
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {    
         for(uint point=0; point<this->pointCoordsX.size(); point++)
         {
             real pointCoordX = this->pointCoordsX[point];
             real pointCoordY = this->pointCoordsY[point];
             real pointCoordZ = this->pointCoordsZ[point];
-            real distX = pointCoordX-para->getParH(level)->coordinateX[j];
-            real distY = pointCoordY-para->getParH(level)->coordinateY[j];
-            real distZ = pointCoordZ-para->getParH(level)->coordinateZ[j];
+            real distX = pointCoordX-para->getParH(level)->coordinateX[pos];
+            real distY = pointCoordY-para->getParH(level)->coordinateY[pos];
+            real distZ = pointCoordZ-para->getParH(level)->coordinateZ[pos];
             if( distX <=dx && distY <=dx && distZ <=dx &&
                 distX >0.f && distY >0.f && distZ >0.f)
             {
-                probeIndices_level.push_back(j);
+                probeIndices_level.push_back((int)pos);
                 distX_level.push_back( distX/dx );
                 distY_level.push_back( distY/dx );
                 distZ_level.push_back( distZ/dx );
@@ -140,4 +140,14 @@ void PointProbe::addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real po
     }
     printf("Added %u  points \n",  n_y*n_z);
 
+}
+
+void PointProbe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
+{
+    for(int level=0; level<=para->getMaxLevel(); level++)
+    {
+        SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
+        std::vector<uint> probeIndices( probeStruct->pointIndicesH, probeStruct->pointIndicesH+probeStruct->nIndices);
+        gridProvider->tagFluidNodeIndices( probeIndices, CollisionTemplate::WriteMacroVars, level);
+    }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
index 6a6fbe76f089acfafc22672dd3e9d71bd193a3b3..08c359705f03b20fbd3276fe209b6ff4d782a5e5 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h
@@ -64,6 +64,7 @@ public:
 
     void addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ);
     void addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z);
+    void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
     
 private:
     bool isAvailableStatistic(Statistic _variable) override;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index cc027b07bded01455437e65e08ccdcd51bcf7dc0..03c18f5a9a2133bec244053113209abc70469a2a 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -187,7 +187,7 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
     this->velocityRatio      = std::bind(&Parameter::getScaledVelocityRatio,        para, _1); 
     this->densityRatio       = std::bind(&Parameter::getScaledDensityRatio,         para, _1);
     this->forceRatio         = std::bind(&Parameter::getScaledForceRatio,           para, _1);
-    this->stressRatio        = std::bind(&Parameter::getScaledPressureRatio,        para, _1);
+    this->stressRatio        = std::bind(&Parameter::getScaledStressRatio,          para, _1);
     this->viscosityRatio     = std::bind(&Parameter::getScaledViscosityRatio,       para, _1);
     this->nondimensional     = std::bind(&Probe::getNondimensionalConversionFactor, this, _1);
 
@@ -315,6 +315,12 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaMemoryManager)
     }
 }
 
+void Probe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider)
+{
+    // Do nothing
+};
+
+
 void Probe::addStatistic(Statistic variable)
 {
     if (!this->isAvailableStatistic(variable)) throw std::runtime_error("Probe::addStatistic(): Statistic not available for this probe type!");
@@ -329,6 +335,22 @@ void Probe::addStatistic(Statistic variable)
     }
 }
 
+std::string Probe::makeParallelFileName(int id, int t)
+{
+    return this->probeName + "_bin_ID_" + StringUtil::toString<int>(id) 
+                                           + "_t_" + StringUtil::toString<int>(t) 
+                                           + ".vtk";
+}
+
+std::string Probe::makeGridFileName(int level, int id, int t, uint part)
+{
+    return this->probeName + "_bin_lev_" + StringUtil::toString<int>(level)
+                                         + "_ID_" + StringUtil::toString<int>(id)
+                                         + "_Part_" + StringUtil::toString<int>(part) 
+                                         + "_t_" + StringUtil::toString<int>(t) 
+                                         + ".vtk";
+}
+
 void Probe::addAllAvailableStatistics()
 {
     for( int var=0; var < int(Statistic::LAST); var++)
@@ -347,119 +369,76 @@ void Probe::write(Parameter* para, int level, int t)
     std::vector<std::string> fnames;
     for (uint i = 1; i <= numberOfParts; i++)
 	{
-        std::string fname = this->probeName + "_bin_lev_" + StringUtil::toString<int>(level)
-                                         + "_ID_" + StringUtil::toString<int>(para->getMyProcessID())
-                                         + "_Part_" + StringUtil::toString<int>(i);
-        if(!this->outputTimeSeries) fname += "_t_" + StringUtil::toString<int>(t_write);
-        fname += ".vtk";
-		fnames.push_back(fname);
-        this->fileNamesForCollectionFile.push_back(fname);
+        this->writeGridFile(para, level, t_write, i);
     }
-    this->writeGridFiles(para, level, fnames, t);
-
-    if(level == 0 && !this->outputTimeSeries) this->writeCollectionFile(para, t);
+    if(level == 0&& !this->outputTimeSeries) this->writeParallelFile(para, t);
 }
 
-void Probe::writeCollectionFile(Parameter* para, int t)
+void Probe::writeParallelFile(Parameter* para, int t)
 {
     int t_write = this->fileNameLU ? t: t/this->tOut; 
-    std::string filename = this->probeName + "_bin_ID_" + StringUtil::toString<int>(para->getMyProcessID()) 
-                                           + "_t_" + StringUtil::toString<int>(t_write) 
-                                           + ".vtk";
-
-    std::ofstream file;
-
-    file.open(this->outputPath + "/" + filename + ".pvtu" );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
-    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
-
-    file << "    <PPointData>" << std::endl;
-
-    for(std::string varName: this->getVarNames()) //TODO
-    {
-        file << "       <DataArray type=\"Float64\" Name=\""<< varName << "\" /> " << std::endl;
-    }
-    file << "    </PPointData>" << std::endl;
-
-    file << "    <PPoints>" << std::endl;
-    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
-    file << "    </PPoints>" << std::endl;
-
-    for( auto& fname : this->fileNamesForCollectionFile )
-    {
-        const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 );
-        file << "    <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl;
-    }
-
-    file << "  </PUnstructuredGrid>" << std::endl;
-    file << "</VTKFile>" << std::endl;
+    std::string filename = this->outputPath + "/" + this->makeParallelFileName(para->getMyProcessID(), t_write);
 
-    //////////////////////////////////////////////////////////////////////////
+    std::vector<std::string> nodedatanames = this->getVarNames();
+    std::vector<std::string> cellNames;
 
-    file.close();
+    getWriter()->writeParallelFile(filename, fileNamesForCollectionFile, nodedatanames, cellNames);
 
     this->fileNamesForCollectionFile.clear();
 }
 
-void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& fnames, int t)
+void Probe::writeGridFile(Parameter* para, int level, int t, uint part)
 {
+    std::string fname = this->outputPath + "/" + this->makeGridFileName(level, para->getMyProcessID(), t, part);
+
     std::vector< UbTupleFloat3 > nodes;
     std::vector< std::string > nodedatanames = this->getVarNames();
 
-    uint startpos = 0;
-    uint endpos = 0;
-    uint sizeOfNodes = 0;
     std::vector< std::vector< double > > nodedata(nodedatanames.size());
 
     SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
 
-    for (uint part = 0; part < fnames.size(); part++)
-    {        
-        startpos = part * para->getlimitOfNodesForVTK();
-        uint nDataPoints = this->outputTimeSeries? this->tProbe: probeStruct->nPoints;
-        sizeOfNodes = min(para->getlimitOfNodesForVTK(), nDataPoints - startpos);
-        endpos = startpos + sizeOfNodes;
+    uint startpos = (part-1) * para->getlimitOfNodesForVTK();
+    uint sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos);
+    uint endpos = startpos + sizeOfNodes;
 
-        //////////////////////////////////////////////////////////////////////////
-        nodes.resize(sizeOfNodes);
+    //////////////////////////////////////////////////////////////////////////
+    nodes.resize(sizeOfNodes);
 
-        for (uint pos = startpos; pos < endpos; pos++)
-        {
-            nodes[pos-startpos] = makeUbTuple(  float(probeStruct->pointCoordsX[pos]),
-                                                float(probeStruct->pointCoordsY[pos]),
-                                                float(probeStruct->pointCoordsZ[pos]));
-        }
+    for (uint pos = startpos; pos < endpos; pos++)
+    {
+        nodes[pos-startpos] = makeUbTuple(  float(probeStruct->pointCoordsX[pos]),
+                                            float(probeStruct->pointCoordsY[pos]),
+                                            float(probeStruct->pointCoordsZ[pos]));
+    }
 
-        for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes);
+    for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes);
 
-        for( int var=0; var < int(Statistic::LAST); var++){           
-            if(this->quantities[var])
-            {
-                Statistic statistic = static_cast<Statistic>(var);
-                real coeff;
+    for( int var=0; var < int(Statistic::LAST); var++){           
+        if(this->quantities[var])
+        {
+            Statistic statistic = static_cast<Statistic>(var);
+            real coeff;
 
-                std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic);
-                uint n_arrs = uint(postProcessingVariables.size());
+            std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic);
+            uint n_arrs = uint(postProcessingVariables.size());
 
-                uint arrOff = probeStruct->arrayOffsetsH[var];
-                uint arrLen = probeStruct->nPoints;
+            uint arrOff = probeStruct->arrayOffsetsH[var];
+            uint arrLen = probeStruct->nPoints;
+
+            for(uint arr=0; arr<n_arrs; arr++)
+            {
+                coeff = postProcessingVariables[arr].conversionFactor(level);
                 
-                for(uint arr=0; arr<n_arrs; arr++)
+                for (uint pos = startpos; pos < endpos; pos++)
                 {
-                    coeff = postProcessingVariables[arr].conversionFactor(level);
-                    
-                    for (uint pos = startpos; pos < endpos; pos++)
-                    {
-                        nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff);
-                    }
+                    nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff);
                 }
             }
         }
-        WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(this->outputPath + "/" + fnames[part], nodes, nodedatanames, nodedata);
     }
+    std::string fullName = getWriter()->writeNodesWithNodeData(fname, nodes, nodedatanames, nodedata);
+    this->fileNamesForCollectionFile.push_back(fullName.substr(fullName.find_last_of('/') + 1));
 }
 
 std::vector<std::string> Probe::getVarNames()
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
index 9cb0bd43e27fb7a28cae9c363ce245fbd9cc5677..aaf294e87d23c64707a16692b9337d6e9ff9c896 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -49,6 +49,7 @@
 
 #include "PreCollisionInteractor/PreCollisionInteractor.h"
 #include "PointerDefinitions.h"
+#include "WbWriterVtkXmlBinary.h"
 
 //=======================================================================================
 //! \note How to add new Statistics 
@@ -153,12 +154,12 @@ public:
         PreCollisionInteractor()
     {
         if (_tStartOut<_tStartAvg)      throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!");
-        if (_tStartTmpAvg<_tStartAvg)   throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!");
     }
     
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override;
     void interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t) override;
     void free(Parameter* para, CudaMemoryManager* cudaMemoryManager) override;
+    virtual void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override;
 
     SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; }
 
@@ -171,6 +172,8 @@ public:
     void setFileNameToNOut(){this->fileNameLU = false;}
     void setTStartTmpAveraging(uint _tStartTmpAveraging){this->tStartTmpAveraging = _tStartTmpAveraging;}
 
+protected:
+    virtual WbWriterVtkXmlBinary* getWriter(){ return WbWriterVtkXmlBinary::getInstance(); };
     real getNondimensionalConversionFactor(int level);
 
 private:
@@ -188,12 +191,15 @@ private:
                         int level);
     virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) = 0;
 
-    void write(Parameter* para, int level, int t);
-    void writeCollectionFile(Parameter* para, int t);
-    void writeGridFiles(Parameter* para, int level, std::vector<std::string >& fnames, int t);
+    virtual void write(Parameter* para, int level, int t);
+    virtual void writeParallelFile(Parameter* para, int t);
+    virtual void writeGridFile(Parameter* para, int level, int t, uint part);
+
     std::vector<std::string> getVarNames();
-    
-private:
+    std::string makeGridFileName(int level, int id, int t, uint part);
+    std::string makeParallelFileName(int id, int t);
+
+protected:
     const std::string probeName;
     const std::string outputPath;
 
@@ -202,7 +208,6 @@ private:
     bool hasDeviceQuantityArray;    //!> flag initiating memCopy in Point and PlaneProbe. Other probes are only based on thrust reduce functions and therefore dont need explict memCopy in interact()
     bool outputTimeSeries;          //!> flag initiating overwrite of output vtk files, skipping collection files and limiting the length of the written data to the current time step (currently only used for WallModelProbe)
     std::vector<std::string> fileNamesForCollectionFile;
-    std::vector<std::string> varNames;
 
     bool fileNameLU = true; //!> if true, written file name contains time step in LU, else is the number of the written probe files
 
@@ -215,7 +220,6 @@ protected:
 
     uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries
 
-
     std::function<real(int)> velocityRatio;
     std::function<real(int)> densityRatio;
     std::function<real(int)> forceRatio;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index 81da15595baae55aa562bc77e24442a9258d992f..3341111c134ace7ca6ff64eeb7f87b38f8014656 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -171,11 +171,11 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
     {
         if (!para->getIsBodyForce()) throw std::runtime_error("WallModelProbe::findPoints(): bodyforce not allocated!");
         // Find all fluid nodes
-        for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+        for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
         {
-            if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID) 
+            if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) 
             {
-                probeIndices_level.push_back(j);
+                probeIndices_level.push_back((int)pos);
             }
         }
     }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
index d6464c5ca2aa60310cc6bb7ca0a210bc12e755ff..4ea90f74c7a0d57af4995e1b5874234967f1e901 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h
@@ -55,14 +55,17 @@ public:
         uint _tStartOut,
         uint _tOut
     ):  Probe(_probeName, 
-             _outputPath,
-             _tStartAvg,
-             _tStartTmpAvg,
-             _tAvg,
-             _tStartOut, 
-             _tOut,
-             false,
-             true){}
+            _outputPath,
+            _tStartAvg,
+            _tStartTmpAvg,
+            _tAvg,
+            _tStartOut, 
+            _tOut,
+            false,
+            true)
+    {
+        if (_tStartTmpAvg<_tStartAvg)   throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!");
+    }
 
 
     void setForceOutputToStress(bool _outputStress){ this->outputStress = _outputStress; }
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
index e43fb54a6b56b4d9a501269544cea000df31cdb7..60dbb2228e6d01fdabf7a6e1bfca786e2104d5b0 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_p
 
 void InitCompAD27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Comp_AD_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-											para->getParD(level)->neighborY,
-											para->getParD(level)->neighborZ,
-											para->getParD(level)->typeOfGridNode,
-											para->getParD(level)->Conc,
-											para->getParD(level)->velocityX,
-											para->getParD(level)->velocityY,
-											para->getParD(level)->velocityZ,
-											para->getParD(level)->numberOfNodes,
-											para->getParD(level)->distributionsAD27.f[0],
-											para->getParD(level)->isEvenTimestep);
-	getLastCudaError("InitAD27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Comp_AD_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->Conc,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Comp_AD_27 execution failed");
 }
 
 bool InitCompAD27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
index 8a53dff5c14adef69aa012bdf1d870d62a9749b2..8097ee13d9064c4104ead8cd8eb5ba529d8972fc 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Paramet
 
 void InitCompAD7::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Comp_AD_7 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->Conc,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->distributionsAD7.f[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("InitAD7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Comp_AD_7 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->Conc,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD7.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Comp_AD_7 execution failed");
 }
 
 bool InitCompAD7::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
index 23ec3e5293ec3a49bf632a720ab554d156dc9674..c4676f28f969e2db8ff7f1910ac784a1c0dab351 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,27 +11,12 @@ std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_p
 
 void InitCompSP27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 
     if( ! para->getUseInitNeq() )
     {
-        LB_Init_Comp_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX,
+        LB_Init_Comp_SP_27 <<< grid.grid, grid.threads >>> (
+            para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
             para->getParD(level)->typeOfGridNode,
@@ -41,11 +27,12 @@ void InitCompSP27::init(int level)
             para->getParD(level)->numberOfNodes,
             para->getParD(level)->distributions.f[0],
             para->getParD(level)->isEvenTimestep);
-        getLastCudaError("LBInitSP27 execution failed");
+        getLastCudaError("LB_Init_Comp_SP_27 execution failed");
     }
     else
     {
-        LB_Init_Comp_Neq_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX,
+        LB_Init_Comp_Neq_SP_27 <<< grid.grid, grid.threads >>> (
+            para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
             para->getParD(level)->neighborInverse,
@@ -59,7 +46,7 @@ void InitCompSP27::init(int level)
             para->getParD(level)->omega,
             para->getParD(level)->isEvenTimestep);
         cudaDeviceSynchronize();
-        getLastCudaError("LBInitNeqSP27 execution failed");
+        getLastCudaError("LB_Init_Comp_Neq_SP_27 execution failed");
     }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
index cb6b40b4371a206c6d1e031822338621c4907be1..14d6b725337aa8b9af279bf794ff1c0912516b64 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
@@ -2,6 +2,7 @@
 
 #include "InitF3_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Par
 
 void InitF3::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_F3 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->rho,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->g6.g[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitF3 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_F3 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->rho,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_F3 execution failed");
 }
 
 bool InitF3::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
index 419ae80b96be57f8dc9c4ebecaccac0d435f00e0..ea700010960b11a1facdda18c35f220f43eb6a66 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
@@ -2,6 +2,7 @@
 
 #include "InitIncompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared
 
 void InitIncompAD27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Incomp_AD_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->Conc,
-												para->getParD(level)->velocityX,
-												para->getParD(level)->velocityY,
-												para->getParD(level)->velocityZ,
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitIncompAD27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Incomp_AD_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->Conc,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Incomp_AD_27 execution failed");
 }
 
 bool InitIncompAD27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
index 795cd0496a207e0861e35e4f310481950a037caf..d7c08e6932cacf2fb5a946010c1855212f1631fc 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
@@ -2,6 +2,7 @@
 
 #include "InitIncompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_
 
 void InitIncompAD7::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);	
-
-	LB_Init_Incomp_AD_7 << < grid, threads >> >(	para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->Conc,
-												para->getParD(level)->velocityX,
-												para->getParD(level)->velocityY,
-												para->getParD(level)->velocityZ,
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitIncompAD7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Incomp_AD_7 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->Conc,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Incomp_AD_7 execution failed");
 }
 
 bool InitIncompAD7::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
index 0538c7ab89eb750a40cfc47486dc0891d4493976..078ad24f24659bf10a3dc9ed90bfd62b5e021187 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
@@ -2,6 +2,7 @@
 
 #include "InitSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<P
 
 void InitSP27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_SP_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->rho,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->distributions.f[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitSP27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->rho,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_SP_27 execution failed");
 }
 
 bool InitSP27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
index a38535f7bdff0d87a5af74a69f0ed8255c647382..15813b7967a84e45f44eb4d286c41aa99c4ff343 100644
--- a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
+++ b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
@@ -9,12 +9,12 @@ void RestartObject::deserialize(const std::string &filename, std::shared_ptr<Par
 {
     deserialize_internal(filename);
 
-    for (int j = para->getCoarse(); j <= para->getFine(); j++) {
+    for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) {
         std::vector<real> vec;
         fs.push_back(vec);
 
-        for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) {
-            para->getParH(j)->distributions.f[0][i] = fs[j][i];
+        for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) {
+            para->getParH(index1)->distributions.f[0][index2] = fs[index1][index2];
         }
     }
 }
@@ -24,15 +24,15 @@ void RestartObject::serialize(const std::string &filename, const std::shared_ptr
     if (fs.size() > 0) {
         clear(para);
     }
-    for (int j = para->getCoarse(); j <= para->getFine(); j++) {
+    for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) {
         std::vector<real> vec;
         fs.push_back(vec);
 
-        for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) {
-            if (UbMath::isNaN(para->getParH(j)->distributions.f[0][i])) {
-                fs[j].push_back((real)0.0);
+        for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) {
+            if (UbMath::isNaN(para->getParH(index1)->distributions.f[0][index2])) {
+                fs[index1].push_back((real)0.0);
             } else {
-                fs[j].push_back(para->getParH(j)->distributions.f[0][i]);
+                fs[index1].push_back(para->getParH(index1)->distributions.f[0][index2]);
             }
         }
     }
diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt
index afa90bdd3f95bb71cf7f1eda6407f9b38766072a..7a9a96ace1c7377b7ad0c67937464d1f2c00cce6 100644
--- a/src/lbm/CMakeLists.txt
+++ b/src/lbm/CMakeLists.txt
@@ -1,12 +1,12 @@
-if(BUILD_VF_CPU)
-    project(lbm LANGUAGES CXX)
 
-    vf_add_library(NAME lbm PUBLIC_LINK basics)
-    target_link_libraries(lbm PRIVATE project_warnings)
-
-    vf_add_tests()
-endif()
+vf_add_library(PUBLIC_LINK basics)
 
 if(BUILD_VF_GPU OR BUILD_VF_GKS)
-    add_subdirectory(cuda)
+    set_target_properties(lbm PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
+
+    set_source_files_properties(KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(BGK.cpp PROPERTIES LANGUAGE CUDA)
 endif()
+
+vf_add_tests()
\ No newline at end of file
diff --git a/src/lbm/KernelParameter.cpp b/src/lbm/KernelParameter.cpp
index e039214d218ef19f35e8adf927f36d3a6f1aa355..7bf5a369d0e5d4e673d79dcb30bc22fc2c330e68 100644
--- a/src/lbm/KernelParameter.cpp
+++ b/src/lbm/KernelParameter.cpp
@@ -5,11 +5,8 @@
 #include "MacroscopicQuantities.h"
 
 
-namespace vf
+namespace vf::lbm
 {
-namespace lbm
-{
-
 
 
 inline __host__ __device__ real Distribution27::getDensity_() const
@@ -17,8 +14,6 @@ inline __host__ __device__ real Distribution27::getDensity_() const
     return getDensity(f);
 }
 
-
-
 __host__ __device__ real abs_internal(real value)
 {
 #ifdef __CUDA_ARCH__
@@ -30,4 +25,3 @@ __host__ __device__ real abs_internal(real value)
 
 
 }
-}
diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h
index 95226628110637f3794c8a1f7e6f6c1f6dda937b..18c4f2a4b20b84d9d519993f3ddb54cf612d4306 100644
--- a/src/lbm/KernelParameter.h
+++ b/src/lbm/KernelParameter.h
@@ -11,9 +11,7 @@
 #include <basics/Core/DataTypes.h>
 
 
-namespace vf
-{
-namespace lbm
+namespace vf::lbm
 {
 
 struct Distribution27
@@ -35,9 +33,6 @@ struct KernelParameter
 };
 
 
-
-
-}
 }
 
 #endif
diff --git a/src/lbm/constants/D3Q27.h b/src/lbm/constants/D3Q27.h
index b9c9d34f7004d1be2f90b6115f005fb2c8d0cbac..c799331815ff92b41b3daf8433bcc10d026a8738 100644
--- a/src/lbm/constants/D3Q27.h
+++ b/src/lbm/constants/D3Q27.h
@@ -7,91 +7,91 @@
 namespace vf::lbm::dir
 {
 
-static constexpr int STARTDIR = 0;
-static constexpr int ENDDIR   = 26;
+static constexpr size_t STARTDIR = 0;
+static constexpr size_t ENDDIR = 26;
 
 // used in the CPU and the GPU version
-static constexpr int DIR_000 = 0;
-static constexpr int DIR_P00 = 1;
-static constexpr int DIR_M00 = 2;
-static constexpr int DIR_0P0 = 3;
-static constexpr int DIR_0M0 = 4;
-static constexpr int DIR_00P = 5;
-static constexpr int DIR_00M = 6;
-static constexpr int DIR_PP0 = 7;
-static constexpr int DIR_MM0 = 8;
-static constexpr int DIR_PM0 = 9;
-static constexpr int DIR_MP0 = 10;
-static constexpr int DIR_P0P = 11;
-static constexpr int DIR_M0M = 12;
-static constexpr int DIR_P0M = 13;
-static constexpr int DIR_M0P = 14;
-static constexpr int DIR_0PP = 15;
-static constexpr int DIR_0MM = 16;
-static constexpr int DIR_0PM = 17;
-static constexpr int DIR_0MP = 18;
-static constexpr int DIR_PPP = 19;
-static constexpr int DIR_MPP = 20;
-static constexpr int DIR_PMP = 21;
-static constexpr int DIR_MMP = 22;
-static constexpr int DIR_PPM = 23;
-static constexpr int DIR_MPM = 24;
-static constexpr int DIR_PMM = 25;
-static constexpr int DIR_MMM = 26;
-
-static constexpr int INV_P00 = DIR_M00;
-static constexpr int INV_M00 = DIR_P00;
-static constexpr int INV_0P0 = DIR_0M0;
-static constexpr int INV_0M0 = DIR_0P0;
-static constexpr int INV_00P = DIR_00M;
-static constexpr int INV_00M = DIR_00P;
-static constexpr int INV_PP0 = DIR_MM0;
-static constexpr int INV_MM0 = DIR_PP0;
-static constexpr int INV_PM0 = DIR_MP0;
-static constexpr int INV_MP0 = DIR_PM0;
-static constexpr int INV_P0P = DIR_M0M;
-static constexpr int INV_M0M = DIR_P0P;
-static constexpr int INV_P0M = DIR_M0P;
-static constexpr int INV_M0P = DIR_P0M;
-static constexpr int INV_0PP = DIR_0MM;
-static constexpr int INV_0MM = DIR_0PP;
-static constexpr int INV_0PM = DIR_0MP;
-static constexpr int INV_0MP = DIR_0PM;
-static constexpr int INV_PPP = DIR_MMM;
-static constexpr int INV_MPP = DIR_PMM;
-static constexpr int INV_PMP = DIR_MPM;
-static constexpr int INV_MMP = DIR_PPM;
-static constexpr int INV_PPM = DIR_MMP;
-static constexpr int INV_MPM = DIR_PMP;
-static constexpr int INV_PMM = DIR_MPP;
-static constexpr int INV_MMM = DIR_PPP;
-
-static constexpr int SGD_P00 = 0;
-static constexpr int SGD_M00 = 1;
-static constexpr int SGD_0P0 = 2;
-static constexpr int SGD_0M0 = 3;
-static constexpr int SGD_00P = 4;
-static constexpr int SGD_00M = 5;
-static constexpr int SGD_PP0 = 6;
-static constexpr int SGD_MM0 = 7;
-static constexpr int SGD_PM0 = 8;
-static constexpr int SGD_MP0 = 9;
-static constexpr int SGD_P0P = 10;
-static constexpr int SGD_M0M = 11;
-static constexpr int SGD_P0M = 12;
-static constexpr int SGD_M0P = 13;
-static constexpr int SGD_0PP = 14;
-static constexpr int SGD_0MM = 15;
-static constexpr int SGD_0PM = 16;
-static constexpr int SGD_0MP = 17;
-static constexpr int SGD_PPP = 18;
-static constexpr int SGD_MPP = 19;
-static constexpr int SGD_PMP = 20;
-static constexpr int SGD_MMP = 21;
-static constexpr int SGD_PPM = 22;
-static constexpr int SGD_MPM = 23;
-static constexpr int SGD_PMM = 24;
-static constexpr int SGD_MMM = 25;
+static constexpr size_t DIR_000 = 0;
+static constexpr size_t DIR_P00 = 1;
+static constexpr size_t DIR_M00 = 2;
+static constexpr size_t DIR_0P0 = 3;
+static constexpr size_t DIR_0M0 = 4;
+static constexpr size_t DIR_00P = 5;
+static constexpr size_t DIR_00M = 6;
+static constexpr size_t DIR_PP0 = 7;
+static constexpr size_t DIR_MM0 = 8;
+static constexpr size_t DIR_PM0 = 9;
+static constexpr size_t DIR_MP0 = 10;
+static constexpr size_t DIR_P0P = 11;
+static constexpr size_t DIR_M0M = 12;
+static constexpr size_t DIR_P0M = 13;
+static constexpr size_t DIR_M0P = 14;
+static constexpr size_t DIR_0PP = 15;
+static constexpr size_t DIR_0MM = 16;
+static constexpr size_t DIR_0PM = 17;
+static constexpr size_t DIR_0MP = 18;
+static constexpr size_t DIR_PPP = 19;
+static constexpr size_t DIR_MPP = 20;
+static constexpr size_t DIR_PMP = 21;
+static constexpr size_t DIR_MMP = 22;
+static constexpr size_t DIR_PPM = 23;
+static constexpr size_t DIR_MPM = 24;
+static constexpr size_t DIR_PMM = 25;
+static constexpr size_t DIR_MMM = 26;
+
+static constexpr size_t INV_P00 = DIR_M00;
+static constexpr size_t INV_M00 = DIR_P00;
+static constexpr size_t INV_0P0 = DIR_0M0;
+static constexpr size_t INV_0M0 = DIR_0P0;
+static constexpr size_t INV_00P = DIR_00M;
+static constexpr size_t INV_00M = DIR_00P;
+static constexpr size_t INV_PP0 = DIR_MM0;
+static constexpr size_t INV_MM0 = DIR_PP0;
+static constexpr size_t INV_PM0 = DIR_MP0;
+static constexpr size_t INV_MP0 = DIR_PM0;
+static constexpr size_t INV_P0P = DIR_M0M;
+static constexpr size_t INV_M0M = DIR_P0P;
+static constexpr size_t INV_P0M = DIR_M0P;
+static constexpr size_t INV_M0P = DIR_P0M;
+static constexpr size_t INV_0PP = DIR_0MM;
+static constexpr size_t INV_0MM = DIR_0PP;
+static constexpr size_t INV_0PM = DIR_0MP;
+static constexpr size_t INV_0MP = DIR_0PM;
+static constexpr size_t INV_PPP = DIR_MMM;
+static constexpr size_t INV_MPP = DIR_PMM;
+static constexpr size_t INV_PMP = DIR_MPM;
+static constexpr size_t INV_MMP = DIR_PPM;
+static constexpr size_t INV_PPM = DIR_MMP;
+static constexpr size_t INV_MPM = DIR_PMP;
+static constexpr size_t INV_PMM = DIR_MPP;
+static constexpr size_t INV_MMM = DIR_PPP;
+
+static constexpr size_t SGD_P00 = 0;
+static constexpr size_t SGD_M00 = 1;
+static constexpr size_t SGD_0P0 = 2;
+static constexpr size_t SGD_0M0 = 3;
+static constexpr size_t SGD_00P = 4;
+static constexpr size_t SGD_00M = 5;
+static constexpr size_t SGD_PP0 = 6;
+static constexpr size_t SGD_MM0 = 7;
+static constexpr size_t SGD_PM0 = 8;
+static constexpr size_t SGD_MP0 = 9;
+static constexpr size_t SGD_P0P = 10;
+static constexpr size_t SGD_M0M = 11;
+static constexpr size_t SGD_P0M = 12;
+static constexpr size_t SGD_M0P = 13;
+static constexpr size_t SGD_0PP = 14;
+static constexpr size_t SGD_0MM = 15;
+static constexpr size_t SGD_0PM = 16;
+static constexpr size_t SGD_0MP = 17;
+static constexpr size_t SGD_PPP = 18;
+static constexpr size_t SGD_MPP = 19;
+static constexpr size_t SGD_PMP = 20;
+static constexpr size_t SGD_MMP = 21;
+static constexpr size_t SGD_PPM = 22;
+static constexpr size_t SGD_MPM = 23;
+static constexpr size_t SGD_PMM = 24;
+static constexpr size_t SGD_MMM = 25;
 
 struct countersForPointerChasing{
     uint counterInverse;
@@ -100,7 +100,7 @@ struct countersForPointerChasing{
     uint counterZ;
 };
 
-const std::map<const int, const countersForPointerChasing> mapForPointerChasing = 
+const std::map<const size_t, const countersForPointerChasing> mapForPointerChasing = 
 {
     {DIR_000, countersForPointerChasing{0, 0, 0, 0}},
     {DIR_P00, countersForPointerChasing{0, 1, 0, 0}},
diff --git a/src/lbm/constants/NumericConstants.h b/src/lbm/constants/NumericConstants.h
index 4918d49aaa0431de639ea8ba3320c4fa45e539d4..1c81192a615d7b99fb90671b7a553247d166147f 100644
--- a/src/lbm/constants/NumericConstants.h
+++ b/src/lbm/constants/NumericConstants.h
@@ -1,6 +1,7 @@
 #ifndef REAL_CONSTANT_H
 #define REAL_CONSTANT_H
 
+#include <cmath>
 
 namespace vf::lbm::constant
 {
@@ -18,6 +19,7 @@ static constexpr double c1o8 = 0.125;
 static constexpr double c1o9 = 0.111111111111111;
 static constexpr double c2o9 = 0.222222222222222;
 static constexpr double c4o9 = 0.444444444444444;
+static constexpr double c4o10 = 0.4;
 static constexpr double c1o10 = 0.1;
 static constexpr double c1o12 = 0.083333333333333;
 static constexpr double c1o16 = 0.0625;
@@ -48,6 +50,7 @@ static constexpr double c99o100 = 0.99;
 static constexpr double c1o126 = 0.007936507936508;
 static constexpr double c1o216 = 0.004629629629630;
 static constexpr double c5o4 = 1.25;
+static constexpr double c4o3 = 1.333333333333333;
 static constexpr double c9o4 = 2.25;
 static constexpr double c5o2 = 2.5;
 static constexpr double c9o2 = 4.5;
@@ -99,15 +102,15 @@ static constexpr double c72o1 = 72.;
 static constexpr double c84o1 = 84.;
 static constexpr double c88o1 = 88.;
 static constexpr double c96o1 = 96.;
-static constexpr double c100o1 = 10.;
-static constexpr double c130o1 = 13.;
-static constexpr double c152o1 = 15.;
-static constexpr double c166o1 = 16.;
-static constexpr double c195o1 = 19.;
-static constexpr double c216o1 = 21.;
-static constexpr double c264o1 = 26.;
-static constexpr double c290o1 = 29.;
-static constexpr double c367o1 = 36.;
+static constexpr double c100o1 = 100.;
+static constexpr double c130o1 = 130.;
+static constexpr double c152o1 = 152.;
+static constexpr double c166o1 = 166.;
+static constexpr double c195o1 = 195.;
+static constexpr double c216o1 = 216.;
+static constexpr double c264o1 = 264.;
+static constexpr double c290o1 = 290.;
+static constexpr double c367o1 = 367.;
 
 static constexpr double Op0000002 = 0.0000002;
 static constexpr double c10eM30 = 1e-30;
@@ -119,6 +122,11 @@ static constexpr double c2Pi = 6.28318530717;
 static constexpr double cPio180 = 1.74532925199e-2;
 static constexpr double c180oPi = 57.2957795131;
 
+static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
+static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
+static const double sqrt2 = sqrt(2.0);       // 1.4142135
+static const double sqrt3 = sqrt(3.0);       // 1.7320508
+
 #else
 static constexpr float c1o2 = 0.5f;
 static constexpr float c3o2 = 1.5f;
@@ -132,6 +140,7 @@ static constexpr float c1o8 = 0.125f;
 static constexpr float c1o9 = (1.0f / 9.0f);
 static constexpr float c2o9 = (2.0f / 9.0f);
 static constexpr float c4o9 = (4.0f / 9.0f);
+static constexpr float c4o10 = 0.4f;
 static constexpr float c1o10 = 0.1f;
 static constexpr float c1o12 = (1.0f / 12.0f);
 static constexpr float c1o16 = 0.0625f;
@@ -162,6 +171,7 @@ static constexpr float c99o100 = 0.99f;
 static constexpr float c1o126 = (1.0f / 126.0f);
 static constexpr float c1o216 = (1.0f / 216.0f);
 static constexpr float c5o4 = 1.25f;
+static constexpr float c4o3 = (4.0f / 3.0f);
 static constexpr float c9o4 = 2.25f;
 static constexpr float c5o2 = 2.5f;
 static constexpr float c9o2 = 4.5f;
@@ -233,6 +243,11 @@ static constexpr double c2Pi = 6.2831853071f;
 static constexpr float cPio180 = 1.74532925199e-2f;
 static constexpr float c180oPi = 57.2957795131f;
 
+static const float one_over_sqrt2 = 1.0 / sqrtf(2.0); // 0.707106781
+static const float one_over_sqrt3 = 1.0 / sqrtf(3.0); // 0.577350269
+static const float sqrt2 = sqrtf(2.0);                // 1.4142135
+static const float sqrt3 = sqrtf(3.0);                // 1.7320508
+
 #endif
 
 }
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
deleted file mode 100644
index 4142b7c3b1c46275c3257e3dfd657cc6b30c841d..0000000000000000000000000000000000000000
--- a/src/lbm/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-project(lbmCuda LANGUAGES CUDA CXX)
-
-
-vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm)
-
-
-set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
-
-
-set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
-set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
-set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/utilities/setup_builder.py b/utilities/setup_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..94e4f805b3e05024742ead72e3ffb28c477c282f
--- /dev/null
+++ b/utilities/setup_builder.py
@@ -0,0 +1,34 @@
+from setuptools import build_meta
+
+class builder(build_meta._BuildMetaBackend):
+
+    def run_setup(self, setup_script='setup.py'):
+        # Note that we can reuse our build directory between calls
+        # Correctness comes first, then optimization later
+        __file__ = setup_script
+        __name__ = '__main__'
+
+        with build_meta._open_setup_script(__file__) as f:
+            code = f.read().replace(r'\r\n', r'\n')
+        args = locals()
+        args["config_args"] = self.extra_args
+        exec(code, args)
+
+
+    def add_settings(self, config_settings):
+        self.extra_args = dict()
+        if config_settings:
+            self.extra_args = config_settings
+
+    def build_wheel(self, wheel_directory, config_settings=None,
+                    metadata_directory=None):
+        self.add_settings(config_settings)
+        return super().build_wheel(wheel_directory, config_settings, metadata_directory)
+
+    def build_sdist(self, sdist_directory, config_settings=None):
+        self.add_settings(config_settings)
+        return super().build_wheel(sdist_directory, config_settings)
+
+build = builder()
+build_wheel = build.build_wheel
+build_sdist = build.build_sdist
\ No newline at end of file